import rpy2
import rpy2.robjects as ro
ro.r(".libPaths")("/public1/software/liuzj/softwares/anaconda3/envs/sc_py/lib/R/library")
from rpy2.robjects.packages import importr
import rpy2.ipython.html
rpy2.ipython.html.init_printing()
from jpy_tools.rTools import py2r, r2py, r_inline_plot, rHelp, trl, rSet, rGet, ad2so, so2ad, so2md, rcontext, Trl, r2py_re, py2r_re
from jpy_tools import loadPkl, toPkl
rBase = importr('base')
rUtils = importr('utils')
dplyr = importr('dplyr')
reticulate = importr('reticulate')
R = ro.r
T = Trl()
R("options(browser='firefox', shiny.port=6533)")
%load_ext rpy2.ipython
WARNING: The R package "reticulate" only fixed recently
an issue that caused a segfault when used with rpy2:
https://github.com/rstudio/reticulate/pull/1188
Make sure that you use a version of that package that includes
the fix.
<Figure size 72x72 with 0 Axes>
from jpy_tools import singleCellTools
Global seed set to 0
import pandas as pd
import scanpy as sc
import numpy as np
import scipy.sparse as ss
from cool import F
import muon as mu
import patchworklib as pw
%config InlineBackend.figure_format = 'retina'
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.font_manager as font_manager
# plt.rcParams['figure.dpi'] = 150
font_dirs = ["/public/home/mowp/test/fonts/"]
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
for font_file in font_files:
font_manager.fontManager.addfont(font_file)
plt.rcParams["font.family"] = "Arial"
sns.despine(top=True, right=True)
from itertools import product
from functools import reduce
import patchworklib as pw
from jpy_tools.otherTools import pwRecoverSeaborn, pwStack, F, pwShow
pw.show = pwShow
fc_recoverSns = pwRecoverSeaborn()
import seaborn.objects as so
from cycler import cycler
dt_snsStyle = {**sns.axes_style('ticks'), "legend.frameon": False, "axes.spines.top":False, "axes.spines.right":False, "axes.prop_cycle":cycler(color=['grey'])}
<Figure size 432x288 with 0 Axes>
dir_temp = '/public/home/liuzj/projects/singleCell/02_jupyter/soybean/0510_combine/'
dir_result = "/public/home/liuzj/projects/singleCell/soybean/02_result/20210922/analysis/noduleWithRoot/"
dt_renameSample = {
"Root": "Root",
"Large Nodule": "Mature Nodule",
"Small Nodule": "Developing Nodule",
}
dt_renameSample = {
"Root": "Root",
"Mature Nodule": "Nodule (21 dpi)",
"Developing Nodule": "Nodule (12 dpi)",
}
# ad.obs['Sample_time'] = ad.obs['Sample_new'].map(dt_renameSample).cat.set_categories(['Nodule (21 dpi)', 'Nodule (12 dpi)', 'Root'])
df_symbol = pd.read_table("/data/Zhaijx/liuzj/data/soybase_IPF/ncbi_locus/soybean_gene_symbol_name.txt").set_index('geneID')[['Symbol', 'other_designations']]
df_symbol = df_symbol.loc[~df_symbol.index.duplicated()]
dt_symbol = df_symbol.loc[~df_symbol['Symbol'].str.startswith('LOC')]['Symbol'].to_dict()
df_soybase = pd.read_table(
"/data/Zhaijx/liuzj/data/soybase_IPF/soybase_genome_annotation_v2.0_09-27-2021.txt",
skiprows=11,
)
dt_soybase = (
df_soybase.assign(
**{
"Gmax 2.0 Primary Protein ID (1)": lambda df: df[
"Gmax 2.0 Primary Protein ID (1)"
].str.replace("Glyma.", "GLYMA_")
}
)
.set_index("Gmax 2.0 Primary Protein ID (1)")[
"Top Arabidopsis (TAIR10) BLASTP Hit (4)"
]
.to_dict()
)
df_arabDesc = pd.read_table('/data/Zhaijx/liuzj/data/Araport11/original/arab_gene_desc.txt')
dt_arabDesc = df_arabDesc['Gene Model Name'].to_dict()
dt_oldLocusToNew = pd.read_table(
"/data/Zhaijx/liuzj/data/soybase_IPF/Glyma_11_to_Glyma_20_Correspondence_Full.csv",
skiprows=1,
).assign(
**{
"Glyma 1.1": lambda df: df["Glyma 1.1"]
.str.replace("Glyma", "Glyma."),
"Glyma2.0": lambda df: df["Glyma2.0"]
.str.replace("g", "G"),
}
).set_index(
"Glyma 1.1"
)[
"Glyma2.0"
].to_dict()
# other rules
dt_oldLocusToNew.update(
{
"Glyma.15G19630": "Glyma.15G178100",
"Glyma.07G04510": "Glyma.07G040300",
"Glyma.6g04220": "Glyma.06G039100",
"Glyma.8g01770": "Glyma.08G014500",
"Glyma.7G133000": "Glyma.07G133000",
"Glyma.5G126200": "Glyma.05G126200",
}
)
dt_oldLocusToNew.update({x.replace('g', 'G'):y for x,y in dt_oldLocusToNew.items()})
dt_oldLocusToNew.update({x.replace('Glyma.', 'Glyma'):y for x,y in dt_oldLocusToNew.items()})
df_nodulationRelatedGene = pd.read_excel('/data/Zhaijx/liuzj/data/nodulation_related_gene/TPC2019-RA-00279_Supplemental_Data_Set_1.xlsx', skiprows=1, header = [0, 1])
df_nodulationRelatedGene = df_nodulationRelatedGene[
[
("Gene Symbol", "Unnamed: 0_level_1"),
("Phenotypic Defect", "Infection"),
("Phenotypic Defect", "Nodule Organogenesis "),
("Phenotypic Defect", "Fixation"),
("Reference", "Unnamed: 4_level_1"),
("Protein class/Molecular function", "Unnamed: 5_level_1"),
("Gene Name", "Unnamed: 6_level_1"),
("Alternate names", "Unnamed: 7_level_1"),
("Mutagen", "Unnamed: 8_level_1"),
("Medicago Gene ID", "Unnamed: 9_level_1"),
("Probeset", "Unnamed: 10_level_1"),
("Lotus Gene ID", "Unnamed: 11_level_1"),
("Soybean Gene ID", "Unnamed: 12_level_1"),
("Phaseolus Gene ID", "Unnamed: 13_level_1"),
("Myc Phenotype", "Unnamed: 14_level_1"),
]
]
def _rename(x):
if x[1].startswith('Unnamed'):
return x[0]
else:
return x[1] + 'Phenotype'
df_nodulationRelatedGene.columns = df_nodulationRelatedGene.columns.map(_rename)
df_nodulationRelatedGene
| Gene Symbol | InfectionPhenotype | Nodule Organogenesis Phenotype | FixationPhenotype | Reference | Protein class/Molecular function | Gene Name | Alternate names | Mutagen | Medicago Gene ID | Probeset | Lotus Gene ID | Soybean Gene ID | Phaseolus Gene ID | Myc Phenotype | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | GmACP | NaN | Nod+/- | NaN | Wang et al., 2014 | Carrier protein | ACYL CARRIER PROTEIN (ACP) | NaN | RNAi | Medtr7g080390 | Mtr.48571.1.S1_at | Lj1g3v2840250 | Glyma.18G244300 | Phvul.008G052400 | NaN |
| 1 | PvAGO5, GmAGO5 | Inf- | Nod+/-, White++ | NaN | Reyero-Saavedra et al., 2017 | Multidomain protein (RNA silencing) | ARGONAUTE 5 (AGO5) | NaN | RNAi | Medtr4g056430, Medtr4g056470 | Mtr.45521.1.S1_at | Lj0g3v0169039.1 | Glyma.12G083500 | Phvul.011G088200 | NaN |
| 2 | LjAMSH | Inf+/-, Inf* | Nod+/- | Fix* | Malolepszy et al., 2015 | Enzyme (Metalloprotease; Deubiquitinating) | ASSOCIATED MOLECULE WITH THE SH3 DOMAIN OF STA... | NaN | LORE1 | Medtr6g083940 | Mtr.21924.1.S1_at; Mtr.21925.1.S1_s_at | Lj2g3v0721190 | Glyma.07G093100 | Phvul.003G094300 | Myc+ |
| 3 | LjAMT1.1 | NaN | Nod++ | Fix+/- | Rogato et al., 2008 | Transporter (Ammonium) | AMMONIUM TRANSPORTER 1.1 (AMT1.1) | NaN | Antisense | Medtr1g045550 | Mtr.10556.1.S1_at | Lj5g3v1314550, Lj0g3v0302059 | Glyma.10G132300 | Phvul.007G231700 | NaN |
| 4 | PvANN1 | Inf+/- | Nod+/- | Fix+/- | Carrasco-castilla et al., 2018 | Membrane binding protein | ANNEXIN1 (ANN1) | NaN | RNAi | Medtr8g038210 | Mtr.14183.1.S1_at | Lj0g3v0203419 | Glyma.13G199800 | Phvul.011G209300 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 202 | MtZPT2 | NaN | Nod* | Fix- | Frugier et al., 2000 | TF (Zinc-finger motif containing) | Krüppel-like Cys-2/His-2 ZINC FINGER PROTEIN (... | NaN | Antisense | Medtr3g102980 | Mtr.37495.1.S1_at | - | Glyma.06G045400 | Phvul.009G070800 | NaN |
| 203 | MtZR1 | NaN | Nod* | Fix+/- | Hopkins et al., 2013 | Unknown protein | ZINC RIBBON protein 1 (ZR1) | NaN | RNAi | Medtr3g086740 | Msa.965.1.S1_at | - | Glyma18g01770 | Phvul.001G254500, Phvul.011G068500 | NaN |
| 204 | MtγECS | NaN | Nod+ | Fix+/- | El Msehli et al., 2011 | Enzyme (Synthetase) | γ‐GLUTAMYLCYSTEINE SYNTHETASE (γECS) | NaN | RNAi | Medtr8g098350 | Mtr.26622.1.S1_at | Lj4g3v2951270 | Glyma.05G207600 | Phvul.002G289200 | NaN |
| 205 | Gs52 | NaN | Nod+/-, Nod* | Fix* | Govindarajulu M, 2009 | Enzyme (Ecto-apyrase) | Glycine sojae 52 (GS52) | NaN | RNAi | Medtr7g085200 | - | Lj1g3v3948070 | Glyma.16G043300 | - | NaN |
| 206 | MsNADH-GOGAT | NaN | Nod++, Nod* | Fix+/- | Cordoba et al., 2003 | Enzyme (NADH-GOGAT) | GLUTAMINE SYNTHETASE (GS)/NADH‐dependent GLUTA... | NaN | Antisense | Medtr1g027020 | Mtr.42795.1.S1_at | Lj0g3v0129059 | Glyma.06G127400 | Phvul.001G076400 | NaN |
207 rows × 15 columns
df_nodulationRelatedGene['Soybean Gene Parsed ID'] = df_nodulationRelatedGene['Soybean Gene ID'].str.findall(r"Glyma.[\w]+\b")
df_nodulationRelatedGene = df_nodulationRelatedGene.explode('Soybean Gene Parsed ID')
df_nodulationRelatedGene['Soybean Gene Parsed ID'] = df_nodulationRelatedGene['Soybean Gene Parsed ID'].map(lambda x:dt_oldLocusToNew.get(x,x))
df_nodulationRelatedGene['Soybean Gene Parsed ID'] = df_nodulationRelatedGene['Soybean Gene Parsed ID'].str.replace('Glyma.', 'GLYMA_')
ls_otherSpecies = """LjNFR5
LjSYMRK
LjNIN
LjHAR1
MtVAMP721d/MtVAMP721e""".split('\n')
df_nodulationRelatedGeneGmOnly = df_nodulationRelatedGene.pipe(lambda df:df.loc[df['Gene Symbol'].str.contains('Gm') | df.eval("`Gene Symbol` in @ls_otherSpecies")])
# df_nodulationRelatedGeneGmOnly = df_nodulationRelatedGene
df_nodulationRelatedGene
| Gene Symbol | InfectionPhenotype | Nodule Organogenesis Phenotype | FixationPhenotype | Reference | Protein class/Molecular function | Gene Name | Alternate names | Mutagen | Medicago Gene ID | Probeset | Lotus Gene ID | Soybean Gene ID | Phaseolus Gene ID | Myc Phenotype | Soybean Gene Parsed ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | GmACP | NaN | Nod+/- | NaN | Wang et al., 2014 | Carrier protein | ACYL CARRIER PROTEIN (ACP) | NaN | RNAi | Medtr7g080390 | Mtr.48571.1.S1_at | Lj1g3v2840250 | Glyma.18G244300 | Phvul.008G052400 | NaN | GLYMA_18G244300 |
| 1 | PvAGO5, GmAGO5 | Inf- | Nod+/-, White++ | NaN | Reyero-Saavedra et al., 2017 | Multidomain protein (RNA silencing) | ARGONAUTE 5 (AGO5) | NaN | RNAi | Medtr4g056430, Medtr4g056470 | Mtr.45521.1.S1_at | Lj0g3v0169039.1 | Glyma.12G083500 | Phvul.011G088200 | NaN | GLYMA_12G083500 |
| 2 | LjAMSH | Inf+/-, Inf* | Nod+/- | Fix* | Malolepszy et al., 2015 | Enzyme (Metalloprotease; Deubiquitinating) | ASSOCIATED MOLECULE WITH THE SH3 DOMAIN OF STA... | NaN | LORE1 | Medtr6g083940 | Mtr.21924.1.S1_at; Mtr.21925.1.S1_s_at | Lj2g3v0721190 | Glyma.07G093100 | Phvul.003G094300 | Myc+ | GLYMA_07G093100 |
| 3 | LjAMT1.1 | NaN | Nod++ | Fix+/- | Rogato et al., 2008 | Transporter (Ammonium) | AMMONIUM TRANSPORTER 1.1 (AMT1.1) | NaN | Antisense | Medtr1g045550 | Mtr.10556.1.S1_at | Lj5g3v1314550, Lj0g3v0302059 | Glyma.10G132300 | Phvul.007G231700 | NaN | GLYMA_10G132300 |
| 4 | PvANN1 | Inf+/- | Nod+/- | Fix+/- | Carrasco-castilla et al., 2018 | Membrane binding protein | ANNEXIN1 (ANN1) | NaN | RNAi | Medtr8g038210 | Mtr.14183.1.S1_at | Lj0g3v0203419 | Glyma.13G199800 | Phvul.011G209300 | NaN | GLYMA_13G199800 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 202 | MtZPT2 | NaN | Nod* | Fix- | Frugier et al., 2000 | TF (Zinc-finger motif containing) | Krüppel-like Cys-2/His-2 ZINC FINGER PROTEIN (... | NaN | Antisense | Medtr3g102980 | Mtr.37495.1.S1_at | - | Glyma.06G045400 | Phvul.009G070800 | NaN | GLYMA_06G045400 |
| 203 | MtZR1 | NaN | Nod* | Fix+/- | Hopkins et al., 2013 | Unknown protein | ZINC RIBBON protein 1 (ZR1) | NaN | RNAi | Medtr3g086740 | Msa.965.1.S1_at | - | Glyma18g01770 | Phvul.001G254500, Phvul.011G068500 | NaN | GLYMA_18G014300 |
| 204 | MtγECS | NaN | Nod+ | Fix+/- | El Msehli et al., 2011 | Enzyme (Synthetase) | γ‐GLUTAMYLCYSTEINE SYNTHETASE (γECS) | NaN | RNAi | Medtr8g098350 | Mtr.26622.1.S1_at | Lj4g3v2951270 | Glyma.05G207600 | Phvul.002G289200 | NaN | GLYMA_05G207600 |
| 205 | Gs52 | NaN | Nod+/-, Nod* | Fix* | Govindarajulu M, 2009 | Enzyme (Ecto-apyrase) | Glycine sojae 52 (GS52) | NaN | RNAi | Medtr7g085200 | - | Lj1g3v3948070 | Glyma.16G043300 | - | NaN | GLYMA_16G043300 |
| 206 | MsNADH-GOGAT | NaN | Nod++, Nod* | Fix+/- | Cordoba et al., 2003 | Enzyme (NADH-GOGAT) | GLUTAMINE SYNTHETASE (GS)/NADH‐dependent GLUTA... | NaN | Antisense | Medtr1g027020 | Mtr.42795.1.S1_at | Lj0g3v0129059 | Glyma.06G127400 | Phvul.001G076400 | NaN | GLYMA_06G127400 |
292 rows × 16 columns
import glob
dir_result = "/public/home/liuzj/projects/singleCell/soybean/02_result/20210922/step1_cellRanger/"
ls_sample = ['nodule_large', 'nodule_small', 'root']
ls_cellrangerH5 = [f"{dir_result}/{x}/{x}/outs/filtered_feature_bc_matrix.h5" for x in ls_sample]
dir_result = "/public/home/liuzj/projects/singleCell/soybean/02_result/20210922/analysis/noduleWithRoot/"
ls_ad = [sc.read_10x_h5(x) for x in ls_cellrangerH5]
ad = sc.concat(ls_ad, label='batch', keys=ls_sample, index_unique='-batch-')
sc.pp.filter_genes(ad, min_cells=10)
ad.obs.value_counts("batch")
batch nodule_large 14819 nodule_small 9686 root 7636 dtype: int64
singleCellTools.detectDoublet.byScDblFinder(ad, batch_key='batch')
2021-09-26 10:50:51.612 | INFO | jpy_tools.singleCellTools.detectDoublet:byScDblFinder:167 - start to transfer adata to R
transfer `<class 'anndata._core.anndata.AnnData'>` to R: End
2021-09-26 10:53:26.225 | INFO | jpy_tools.singleCellTools.detectDoublet:byScDblFinder:171 - start to calculate doublet score 2021-09-26 10:55:32.050 | INFO | jpy_tools.singleCellTools.detectDoublet:byScDblFinder:175 - start to intergrate result with adata
transfer `DFrame` to python: End
2021-09-26 10:55:33.833 | INFO | jpy_tools.singleCellTools.detectDoublet:byScDblFinder:188 - before filter: 32141 2021-09-26 10:55:34.100 | INFO | jpy_tools.singleCellTools.detectDoublet:byScDblFinder:192 - after filter: 27602
ad.obs['batch'] = ad.obs['batch'].cat.set_categories(['nodule_small', 'nodule_large', 'root'])
plt.subplots(figsize=(4,4))
sns.violinplot(data = ad.obs, x='batch', y='n_counts')
plt.ylim(-100, 8100)
plt.xticks([0,1,2], ['Nodule (12 dpi)', 'Nodule (21 dpi)', 'Root'], rotation=-30, ha='left')
plt.xlabel('')
plt.ylabel("Counts")
plt.title("UMI counts")
sns.despine()
plt.subplots(figsize=(4,4))
sns.violinplot(data = ad.obs, x='batch', y='n_genes')
plt.ylim(-100, 4100)
plt.xticks([0,1,2], ['Nodule (12 dpi)', 'Nodule (21 dpi)', 'Root'], rotation=-30, ha='left')
plt.xlabel('')
plt.ylabel("Counts")
plt.title("Gene counts")
sns.despine()
singleCellTools.plotting.plotCellScatter(ad)
ad = ad[ad.obs.eval("400 < n_genes < 4000 & 600 < n_counts < 6000")]
ad.obs.value_counts("batch")
batch nodule_large 12004 nodule_small 8229 root 6479 dtype: int64
ad.layers['raw'] = ad.X.copy()
ad.obs['UMI counts'] = ad.to_df('raw').sum(1)
ad.obs['Gene counts'] = (ad.to_df('raw') > 0).astype(int).sum(1)
_dt = {'nodule_large':'Large Nodule', 'nodule_small':'Small Nodule', 'root':'Root'}
ad.obs['Sample'] = ad.obs['batch'].map(_dt)
ad_merged = singleCellTools.geneEnrichInfo._mergeData(ad, 'Sample_time', 'raw')
(ad_merged.to_df() > 0).T.to_excel(f"{dir_result}/gene_basic_info.xlsx")
import scvi
from jpy_tools.otherTools import loadPkl, toPkl
sc.pp.highly_variable_genes(ad, layer='raw', batch_key='batch', n_top_genes=5000, flavor='seurat_v3')
ad.layers['normalize_log'] = ad.layers['raw'].copy()
sc.pp.normalize_total(ad, 1e4, layer='normalize_log')
sc.pp.log1p(ad, layer='normalize_log')
ad_forScvi = singleCellTools.basic.getPartialLayersAdata(ad, 'raw', ['batch'], ['highly_variable'])
ad_forScvi = ad_forScvi[:, ad_forScvi.var['highly_variable']].copy()
toPkl(ad_forScvi, 'ad_forScvi_soybean', 'scem')
0
# run on scem
# scvi.data.setup_anndata(
# ad_forScvi,
# )
# scvi.settings.seed = 39
# scvi.settings.num_threads = 56
# model_withBatchEffect = scvi.model.SCVI(ad_forScvi)
# model_withBatchEffect.train()
# ad_forScvi.obsm['X_scvi_withBatchEffect'] = model_withBatchEffect.get_latent_representation(ad_forScvi).copy()
# scvi.data.setup_anndata(
# ad_forScvi,
# batch_key='batch'
# )
# scvi.settings.seed = 39
# scvi.settings.num_threads = 56
# model = scvi.model.SCVI(ad_forScvi)
# model.train()
# ad_forScvi.obsm['X_scvi'] = model.get_latent_representation(ad_forScvi).copy()
ad_forScvi = loadPkl('ad_forScvi_soybean').copy()
ad.obsm['X_scvi_withBatchEffect'] = ad_forScvi.obsm['X_scvi_withBatchEffect']
ad.obsm['X_scvi'] = ad_forScvi.obsm['X_scvi']
sc.pp.neighbors(ad, use_rep='X_scvi')
sc.tl.umap(ad)
sc.tl.leiden(ad, resolution=0.3)
ad.obs['Cluster'] = ad.obs['leiden']
ad.obs['Cluster'] = ad.obs['Cluster'].astype(str)
_ls_colors = ['#1f77b4',
'#ff7f0e',
'#279e68',
'#ff9896',
'#aa40fc',
'#8c564b',
'#e377c2',
'#b5bd61',
'#17becf',
'#aec7e8',
'#ffbb78',
'#98df8a',
'#d62728',
'#c5b0d5',
'#c49c94']
ad.uns['leiden_colors'] = _ls_colors
ad.uns['Cluster_colors'] = _ls_colors
ad.uns['Cell type_colors'] = _ls_colors
sc.pl.umap(ad, color = 'Cluster', legend_loc='on data', title='Finally used cutoff\n(black line)')
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="")
sns.despine(top=True, right=True)
plt.show()
sc.pl.umap(ad, color = 'Cluster')
singleCellTools.plotting.plotLabelPercentageInCluster(ad, "Cluster", "Sample")
<AxesSubplot:xlabel='Cluster', ylabel='Percentage'>
sc.pl.umap(ad, color="Sample")
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="")
sns.despine(top=True, right=True)
plt.show()
singleCellTools.plotting.plotLabelPercentageInCluster(ad, "Cluster", "Sample")
<AxesSubplot:xlabel='Cluster', ylabel='Percentage'>
ad.uns['Sample_time_colors'] = ['#ff7f0e', '#1f77b4', '#2ca02c']
ad.obs['Sample_time'] = ad.obs['Sample_time'].cat.set_categories([ 'Nodule (12 dpi)','Nodule (21 dpi)', 'Root'])
ax = sc.pl.umap(ad, color="Cluster", title='Single-nucleus transcriptomes', legend_loc='on data', show=False)
plt.title('Single-nucleus transcriptomes', fontsize=16)
plt.show()
ax = sc.pl.umap(ad, color="Sample_time", title='Integrated three libraries', show=False)
plt.title('Integrated three libraries', fontsize=16)
plt.legend(loc='upper left', bbox_to_anchor=(0.6, 0.3), frameon=False, ncol=1)
plt.show()
ax = sc.pl.umap(ad, color="Sample_time", title='scVI', show=False)
plt.legend(loc='upper left', bbox_to_anchor=(0.6, 0.3), frameon=False, ncol=1)
<matplotlib.legend.Legend at 0x2b3a1242a400>
ax = sc.pl.umap(ad, color="Sample_new", title='Sample', show=False)
plt.legend(loc='upper left', bbox_to_anchor=(-0.1, -0.1), frameon=False, ncol=3)
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="")
sns.despine(top=True, right=True)
plt.show()
singleCellTools.plotting.plotLabelPercentageInCluster(ad, "Cluster", "Sample")
<AxesSubplot:xlabel='Cluster', ylabel='Percentage'>
import scanpy.external as sce
ad_scanorama = ad.copy()
ad_scanorama.X = ad_scanorama.layers['normalize_log'].copy()
sc.tl.pca(ad_scanorama)
sc.pl.pca_variance_ratio(ad_scanorama, 50,)
ad_scanorama.obsm['X_pca_10'] = ad_scanorama.obsm['X_pca'][:, :10]
sce.pp.scanorama_integrate(ad_scanorama, 'batch', basis='X_pca_10')
[[0. 0.19650018 0.01389103] [0. 0. 0.20311777] [0. 0. 0. ]] Processing datasets nodule_small <=> root Processing datasets nodule_large <=> nodule_small
sc.pp.neighbors(ad_scanorama, use_rep='X_scanorama')
sc.tl.umap(ad_scanorama, 0.3)
sc.pl.umap(ad_scanorama, color='Sample_time', title='Scanorama')
sc.pl.umap(ad_scanorama, color='Cluster')
sc.pl.umap(ad_scanorama, color='leiden_R')
sc.pl.umap(ad_scanorama, color='leiden_R', groups=('leiden', ('12-1')), na_color='#FFFFFF')
sc.pl.umap(
ad_scanorama[ad_scanorama.obs.eval("Cluster in ['12']")],
color="leiden_R", show=False, palette=['#EEBFC2', '#DE1E2A'],size=12
)
sns.despine(left=True, bottom=True)
plt.title('')
plt.xlabel('')
plt.ylabel('')
Text(0, 0.5, '')
axs = sc.pl.umap(ad_scanorama, color=['Sample_time', 'Cluster'], title=['Sample', 'Cluster'], wspace=0.3, show=False)
ax = axs[0]
plt.sca(ax)
# plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.07), frameon=False, ncol=3, fontsize=12)
# plt.tight_layout()
plt.show()
import scanpy.external as sce
ad_harmony = ad.copy()
ad_harmony.X = ad_harmony.layers['normalize_log'].copy()
sc.tl.pca(ad_harmony)
sc.pl.pca_variance_ratio(ad_harmony, 50,)
ad_harmony.obsm['X_pca_10'] = ad_harmony.obsm['X_pca'][:, :10]
sce.pp.harmony_integrate(ad_harmony, 'batch', basis='X_pca_10')
2022-10-31 14:01:49,755 - harmonypy - INFO - Iteration 1 of 10 2022-10-31 14:01:59,244 - harmonypy - INFO - Iteration 2 of 10 2022-10-31 14:02:08,679 - harmonypy - INFO - Iteration 3 of 10 2022-10-31 14:02:18,614 - harmonypy - INFO - Iteration 4 of 10 2022-10-31 14:02:28,275 - harmonypy - INFO - Iteration 5 of 10 2022-10-31 14:02:37,530 - harmonypy - INFO - Iteration 6 of 10 2022-10-31 14:02:46,733 - harmonypy - INFO - Converged after 6 iterations
sc.pp.neighbors(ad_harmony, use_rep='X_pca_harmony')
sc.tl.umap(ad_harmony, 0.3)
axs = sc.pl.umap(ad_harmony, color=['Sample_time', 'Cluster'], title=['Sample', 'Cluster'], wspace=0.3)
sc.pl.umap(ad_harmony, color='leiden_R', groups=('leiden_R', ('12-1')), na_color='#FFFFFF')
sc.pl.umap(
ad_harmony[ad_harmony.obs.eval("Cluster in ['12']")],
color="leiden_R", show=False, palette=['#EEBFC2', '#DE1E2A'],size=12
)
plt.title('Cluster 12')
Text(0.5, 1.0, 'Cluster 12')
Seurat = importr('Seurat')
ad_cca = singleCellTools.normalize.integrateBySeurat(ad, 'batch', ad.var.loc[lambda df:df['highly_variable']].index.to_list())
R[write to console]: Warning:
R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')
R[write to console]: Warning:
R[write to console]: Invalid name supplied, making object name syntactically valid. New object name is batchn_genesn_countspercent_ctleiden_0.0leiden_0.1leiden_0.2leiden_0.3leiden_0.4leiden_0.5leiden_0.6leiden_0.7leiden_0.8leiden_0.9leiden_1.0leiden_1.1leiden_1.2leiden_1.3leiden_1.4leiden_1.5leiden_1.6leiden_1.7leiden_1.8leiden_1.9leiden_2.0leidenUMI.countsGene.countsSampleleiden_RClusterCell.typeX__groupSample_twoPartitionUMI.counts.log10Sample_newwgcna_clustercluster_mergeUCSample_time; see ?make.names for more details on syntax validity
R[write to console]: Warning:
R[write to console]: Keys should be one or more alphanumeric characters followed by an underscore, setting key from scvi_withBatchEffect_ to scviwithBatchEffect_
R[write to console]: Warning:
R[write to console]: All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to scviwithBatchEffect_
R[write to console]: Warning:
R[write to console]: Adding a Graph without an assay associated with it
R[write to console]: Warning:
R[write to console]: Adding a Graph without an assay associated with it
R[write to console]: Performing log-normalization
R[write to console]: 0% 10 20 30 40 50 60 70 80 90 100%
R[write to console]: [----|----|----|----|----|----|----|----|----|----|
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: |
R[write to console]: Performing log-normalization
R[write to console]: 0% 10 20 30 40 50 60 70 80 90 100%
R[write to console]: [----|----|----|----|----|----|----|----|----|----|
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: |
R[write to console]: Performing log-normalization
R[write to console]: 0% 10 20 30 40 50 60 70 80 90 100%
R[write to console]: [----|----|----|----|----|----|----|----|----|----|
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: *
R[write to console]: |
R[write to console]: Scaling features for provided objects
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=11s
R[write to console]: Finding all pairwise anchors
| | 0 % ~calculating
R[write to console]: Running CCA R[write to console]: Merging objects R[write to console]: Finding neighborhoods R[write to console]: Finding anchors R[write to console]: Found 15752 anchors R[write to console]: Filtering anchors R[write to console]: Retained 5537 anchors
|+++++++++++++++++ | 33% ~12m 38s
R[write to console]: Running CCA R[write to console]: Merging objects R[write to console]: Finding neighborhoods R[write to console]: Finding anchors R[write to console]: Found 15221 anchors R[write to console]: Filtering anchors R[write to console]: Retained 2648 anchors
|++++++++++++++++++++++++++++++++++ | 67% ~06m 35s
R[write to console]: Running CCA R[write to console]: Merging objects R[write to console]: Finding neighborhoods R[write to console]: Finding anchors R[write to console]: Found 12094 anchors R[write to console]: Filtering anchors R[write to console]: Retained 3489 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=15m 26s
R[write to console]: Merging dataset 2 into 1 R[write to console]: Extracting anchors for merged samples R[write to console]: Finding integration vectors R[write to console]: Finding integration vector weights R[write to console]: 0% 10 20 30 40 50 60 70 80 90 100% R[write to console]: [----|----|----|----|----|----|----|----|----|----| R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: | R[write to console]: Integrating data R[write to console]: Merging dataset 3 into 1 2 R[write to console]: Extracting anchors for merged samples R[write to console]: Finding integration vectors R[write to console]: Finding integration vector weights R[write to console]: 0% 10 20 30 40 50 60 70 80 90 100% R[write to console]: [----|----|----|----|----|----|----|----|----|----| R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: * R[write to console]: | R[write to console]: Integrating data
sc.tl.pca(ad_cca)
sc.pl.pca_variance_ratio(ad_cca, 50)
sc.pp.neighbors(ad_cca, n_pcs=20)
sc.tl.umap(ad_cca, 0.3)
ad_cca.uns['Cluster_colors'] = ad.uns['Cluster_colors']
axs = sc.pl.umap(ad_cca, color=['Sample_time', 'Cluster'], title=['Sample', 'Cluster'], wspace=0.3)
sc.pl.umap(ad_cca, color='leiden_R', groups=('leiden_R', ('12-1')), na_color='#FFFFFF')
sc.pl.umap(
ad_cca[ad_cca.obs.eval("Cluster in ['12']")],
color="leiden_R", show=False, palette=['#EEBFC2', '#DE1E2A'],size=12
)
plt.title('Cluster 12')
Text(0.5, 1.0, 'Cluster 12')
dt_visualizationGene = {
"Cortex": {"PIN2": "GLYMA_09G117900", "ENOD2": "GLYMA_20G203800", "CA1": "GLYMA_06G182700"},
"Stele": dict(
# MATE79="GLYMA_13G339800",
MATE87="GLYMA_15G274600",
PIN1b="GLYMA_07G102500",
# PIN1d="GLYMA_03G126000",
SLR="GLYMA_03G158700",
**{"NPF7.3":"GLYMA_17G153300"}
),
# "Infection Zone": dict(
# LBA="GLYMA_10G199100",
# LBC1="GLYMA_10G199000",
# LBC2="GLYMA_20G191200",
# LBC3="GLYMA_10G198800",
# ),
"Infected cell": {"SYMREM1.1": "GLYMA_08G012800", "ENOD55": "GLYMA_02G204500", "RPG": "GLYMA_10G198700"},
"Epidermis": dict(SHV3='GLYMA_08G324300', COBL7="GLYMA_09G039900", CPC= "GLYMA_01G224900"),
}
dt_visualizationGene = {x:{j:f"{i}\n({j})" for i,j in y.items()} for x,y in dt_visualizationGene.items()}
with plt.rc_context({"figure.figsize": (3, 2)}):
for (celltype, _dt_genes), x, z, y in zip(
dt_visualizationGene.items(), [0.45, 0.45, 0.46, 0.45, 0.45], [5,5,5,5,10], [1.3,1.3,1.3,1.3,1.3]
):
axs = sc.pl.umap(
ad,
layer="normalize_log",
cmap="Reds",
color=_dt_genes.keys(),
title=_dt_genes.values(),
size=z,
show=False,
ncols=4,
hspace=0.7,
)
if len(_dt_genes) == 1:
axs = [axs]
for ax in axs:
plt.sca(ax)
text = ax.get_title()
plt.title(text, fontdict={"style": "italic"})
if celltype == 'Stele':
celltype = 'Vascular bundle'
plt.suptitle(celltype, x=x, y=y, fontsize=20, fontweight='bold')
plt.show()
_ls = ['GLYMA_02G098200', 'GLYMA_11G203900', 'GLYMA_05G088400', 'GLYMA_13G300600']
sc.pl.umap(
ad,
layer="normalize_log",
cmap="Reds",
color=_ls,
size=15,
ncols=2,
show=False,
)
[<AxesSubplot:title={'center':'GLYMA_02G098200'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_11G203900'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_05G088400'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_13G300600'}, xlabel='UMAP1', ylabel='UMAP2'>]
dt_arab2soybean = pd.read_table(
"/public/home/liuzj/data/ortholog/plant/parsed/1v1/arab__v__soybase_2.1v1.tsv"
).set_index("arab")["soybase_2"].to_dict()
dt_arab2soybean = pd.read_table(
"/public/home/liuzj/data/ortholog/plant/parsed/1v1_test/soybase_2__v__arab.1v1.tsv"
).set_index("arab")["soybase_2"].to_dict()
ad_ara = sc.read_h5ad('/data/Zhaijx/liuzj/projects/singleCellRoot/bioaxivData/GSE152766_Root_Atlas.h5ad')
ad_ara = ad_ara[:, ad_ara.var.index.isin(list(dt_arab2soybean.keys()))]
ad_ara.var.index = ad_ara.var.index.map(dt_arab2soybean)
ad_ara.obs['specie'] = 'arabidopsis'
ad_root = ad[ad.obs['batch'] == 'root'].copy()
ad_root.obs['orig.ident'] = 'soybean'
ad_root.obs['specie'] = 'soybean'
import anndata
toPkl(ad_ara, 'ad_ara', 'scem')
toPkl(ad_root, 'ad_root', 'scem')
2022-11-02 19:20:20.833 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_ara', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
2022-11-02 19:20:52.617 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_root', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
0 0
# ad_root = loadPkl('ad_root', lambda **dt:sc.read_h5ad(**dt), arg_path='filename').copy()
# ad_ara = loadPkl('ad_ara', lambda **dt:sc.read_h5ad(**dt), arg_path='filename').copy()
# ad_ara = ad_ara[ad_ara.obs['time.anno'].isin(['Maturation'])]
# ad_ara = ad_ara[~ad_ara.obs['celltype.anno'].isin(['Putative Quiescent Center', 'Stem Cell Niche', 'Columella', 'Lateral Root Cap'])]
# ad_integrated = singleCellTools.scvi.labelTransferByScanvi(
# ad_ara,
# "celltype.anno",
# "raw",
# ad_root,
# "raw",
# True,
# ["orig.ident", "specie"],
# max_epochs=200,
# mode="merge",
# batch_size_ref=2**11,
# dt_params2SCVIModel = {'n_layers': 4},
# dt_params2SCANVIModel = {'n_layers':4},
# hvgBatch='specie',
# n_top_genes=5000)
# toPkl(ad_root, 'ad_root', 'ipf', dir_path=dir_temp)
# toPkl(ad_integrated, 'ad_integrated', 'ipf', dir_path=dir_temp)
ad_root = loadPkl('ad_root', lambda **dt:sc.read_h5ad(**dt), arg_path='filename').copy()
ad_integrated = loadPkl('ad_integrated', lambda **dt:sc.read_h5ad(**dt), arg_path='filename').copy()
_dt = {'arabidopsis': 'Arabidopsis', 'soybean':'Soybean'}
ad_integrated.obs['Specie'] = ad_integrated.obs['specie'].map(_dt)
sc.pl.umap(ad_integrated, color='Specie', )
ad_root.obs["labelTransfer_scanvi_celltype.anno"] = (
ad_root.obs["labelTransfer_scanvi_celltype.anno"]
.str.capitalize()
.astype("category")
.cat.reorder_categories(
[
"Atrichoblast",
"Cortex",
"Endodermis",
"Pericycle",
"Phloem",
"Procambium",
"Trichoblast",
"Xylem",
"Unknown",
]
)
)
ad_integrated.obs['scANVI_results'] = ad_root.obs['labelTransfer_scanvi_celltype.anno']
ad_root.obs['Cluster'] = ad.obs['Cluster']
ad_integrated.obs['Cluster'] = ad.obs['Cluster']
_ad = ad_integrated[ad_integrated.obs.eval("Specie == 'Soybean'")]
ax = sc.pl.umap(ad_integrated, show=False)
sc.pl.umap(
_ad,
show=False,
ax=ax,
title="Cell type\n(label transfered from Arabidopsis dataset)",
color="scANVI_results",
size=12e4 / len(ad_integrated),
)
plt.show()
_ad = ad_integrated[ad_integrated.obs.eval("Specie == 'Soybean'")]
ax = sc.pl.umap(ad_integrated, show=False)
sc.pl.umap(
_ad,
show=False,
ax=ax,
color="Cluster",
size=12e4 / len(ad_integrated),
)
plt.show()
Trying to set attribute `.uns` of view, copying.
Trying to set attribute `.uns` of view, copying.
ad_root = loadPkl(
"ad_root",
dir_path="/public/home/liuzj/projects/singleCell/02_jupyter/soybean/2021_11_12",
)
ax = sc.pl.umap(ad, show=False)
sc.pl.umap(
ad_root,
color="labelTransfer_scanvi_celltype.anno",
ax=ax,
size=12e4 / len(ad),
title="Cell type\n(label transfered from Arabidopsis dataset)",
show=False
)
# plt.legend(ncol=4, frameon=False, bbox_to_anchor=(0,1))
<AxesSubplot:title={'center':'Cell type\n(label transfered from Arabidopsis dataset)'}, xlabel='UMAP1', ylabel='UMAP2'>
_dt = {
"Atrichoblast": "Epidermis",
"Pericycle": "Stele",
"Phloem": "Stele",
"Procambium": "Stele",
"Trichoblast": "Epidermis",
"Xylem": "Stele",
"unknown": "Unknown",
}
ad_root.obs["labelTransfer_results"] = ad_root.obs[
"labelTransfer_scanvi_celltype.anno"
].map(lambda x: _dt.get(x, x))
# ad_root.uns['labelTransfer_results_colors'] = ['#FBB03B', '#F77D0E', '#8C5552', '#B5C3E2', '#808080']
_dt = singleCellTools.basic.getadataColor(ad_root, 'labelTransfer_results')
_dt['Unknown'] = '#808080'
singleCellTools.basic.setadataColor(ad_root, 'labelTransfer_results', _dt);
ax = sc.pl.umap(ad, show=False)
sc.pl.umap(
ad_root,
color="labelTransfer_results",
ax=ax,
size=12e4 / len(ad),
title="Cell type\n(label transfered from Arabidopsis dataset)",
show=False
)
sns.despine()
# plt.legend(ncol=4, frameon=False, bbox_to_anchor=(0,1))
# ad.obs['labelTransfer_scanvi_celltype.anno'] = ad_integrated.obs['labelTransfer_scanvi_celltype.anno']
# ad.obs['labelTransfer_scanvi_time.celltype.anno'] = ad_integratedZone.obs['labelTransfer_scanvi_time.celltype.anno']
ad_root.obs['Cluster'] = ad.obs['Cluster']
ad_root.obs['Cluster'].value_counts().loc[lambda x:x >= 100].index.to_list()
['3', '6', '5', '8', '2', '10', '4', '13']
_ls = ad_root.obs["Cluster"].value_counts().loc[lambda x: x >= 100].index.to_list()
_ad = ad_root[ad_root.obs.eval("Cluster in @_ls")]
singleCellTools.plotting.plotLabelPercentageInCluster(
_ad, "Cluster", "labelTransfer_results", dt_kwargsForLegend=dict(ncol=5, loc='upper left', bbox_to_anchor=(0,-0.2))
)
<AxesSubplot:xlabel='Cluster', ylabel='Percentage'>
sc.pl.umap(ad_root, color=['labelTransfer_scanvi_celltype.anno', 'Cluster'], wspace=0.5,)
import tqdm
import sklearn.metrics
ad_forClusteringTest = ad.copy()
for res in tqdm.tqdm(np.linspace(0, 1.5, 16)):
res= f"{res:.1f}" >> F(float)
sc.tl.leiden(ad_forClusteringTest, resolution=res, key_added=f"leiden_{res}")
100%|██████████| 16/16 [01:31<00:00, 5.72s/it]
dt_silhouette_score = {}
for res in tqdm.tqdm(np.linspace(0, 1.5, 16)):
if res == 0:
continue
res= f"{res:.1f}" >> F(float)
dt_silhouette_score[res] = sklearn.metrics.silhouette_score(ad_forClusteringTest.obsm['X_scvi'], ad_forClusteringTest.obs[f"leiden_{res}"], random_state = 39)
100%|██████████| 16/16 [04:22<00:00, 16.42s/it]
ad_rootKnownByAra = ad_root[ad_root.obs.eval("labelTransfer_results != 'Unknown'")]
dt_ami = {}
for res in np.linspace(0, 1.5, 16):
res= f"{res:.1f}" >> F(float)
dt_ami[res >> F(str)] = sklearn.metrics.adjusted_mutual_info_score(ad_rootKnownByAra.obs['labelTransfer_results'], ad_forClusteringTest[ad_rootKnownByAra.obs.index].obs[f"leiden_{res}"] )
(
so.Plot(x=dt_ami.keys(), y=dt_ami.values())
.add(so.Line(color='Black'))
.add(so.Dot(color='Black'))
.theme(dt_snsStyle)
.label(x='Resolution', y='Adjusted mutual information')
)
sc.pl.umap(ad_root, color='labelTransfer_results')
sc.pl.umap(ad_root, color='leiden_0.5')
sc.pl.umap(ad_root, color='leiden_0.2')
sc.pl.umap(ad_forClusteringTest, color='leiden_0.2', title='Cluster\n(Resolution = 0.2)')
ad_forClusteringTest.uns['leiden_0.3_colors'] = ad.uns['Cluster_colors']
sc.pl.umap(ad_root, color='leiden_0.5')
sc.pl.umap(ad_forClusteringTest, color='leiden_0.5', title='Cluster\n(Resolution = 0.5)')
sc.pl.umap(ad_forClusteringTest, color='Sample')
singleCellTools.geneEnrichInfo.calculateEnrichScoreByCellex(ad_forClusteringTest, 'raw', 'leiden_0.5', kayAddedPrefix='enrichScore_')
_dt = ad_forClusteringTest.uns['enrichScore__cellexES'].groupby('leiden_0.5').apply(lambda x: x.sort_values('enrichScore', ascending=False).head(5)['gene'].to_list()).to_dict()
for c in ['1','2', '6', '9']:
sc.pl.umap(ad_forClusteringTest, color=_dt[c], ncols=1, layer='normalize_log', cmap='Reds')
import glob
dir_result = "/public/home/liuzj/projects/singleCell/soybean/02_result/20210922/step1_cellRanger/"
ls_sample = ['nodule_large', 'nodule_small', 'root']
ls_cellrangerH5 = [f"{dir_result}/{x}/{x}/outs/filtered_feature_bc_matrix.h5" for x in ls_sample]
dir_result = "/public/home/liuzj/projects/singleCell/soybean/02_result/20210922/analysis/noduleWithRoot/"
ls_ad = [sc.read_10x_h5(x) for x in ls_cellrangerH5]
ad_raw = sc.concat(ls_ad, label='batch', keys=ls_sample, index_unique='-batch-')
dt_renameSample = {
"root": "Root",
"nodule_large": "Nodule (21 dpi)",
"nodule_small": "Nodule (12 dpi)",
}
ad_raw.obs['Sample'] = ad_raw.obs['batch'].map(dt_renameSample).astype('category').cat.set_categories(['Nodule (12 dpi)', 'Nodule (21 dpi)', 'Root'])
sc.pp.filter_genes(ad_raw, min_cells=10)
singleCellTools.detectDoublet.byScDblFinder(ad_raw, batch_key='batch')
singleCellTools.plotting.plotCellScatter(ad_raw, batch='batch')
fig = (
so.Plot(data=ad_raw.obs)
.facet(row='Sample')
.add(so.Bars(), so.Hist(binrange=(0, 8000)), so.Stack(), x='n_genes', color='Sample', legend=False)
.share(y=False)
.limit(x=(0, 8000))
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=400, y=[0, 1250], orient='y', data={})
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=4000, y=[0, 1250], orient='y', data={})
.scale(color='deep')
.theme(dt_snsStyle)
.label(x='Number of genes', y='Counts')
.layout(size=(4,4))
.plot()._figure
)
axs = fig.axes
for ax in axs:
ax.axvline(x=400, ls='--', color='black', lw=1)
ax.axvline(x=4000, ls='--', color='black', lw=1)
fig
fig = (
so.Plot(data=ad_raw.obs)
.facet(row='Sample')
.add(so.Bars(), so.Hist(binrange=(0, 10000)), so.Stack(), x='n_counts', color='Sample', legend=False)
.limit(x=(0, 10000))
.share(y=False)
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=600, y=[0, 1350], orient='y', data={})
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=6000, y=[0, 1350], orient='y', data={})
.scale(color='deep')
.theme(dt_snsStyle)
.label(x='Number of UMIs', y='Counts')
.layout(size=(4,4))
.plot()._figure
)
axs = fig.axes
for ax in axs:
ax.axvline(x=600, ls='--', color='black', lw=1)
ax.axvline(x=6000, ls='--', color='black', lw=1)
fig
sns.boxplot(ad.obs, x='Cluster', y='n_counts')
<AxesSubplot:xlabel='Cluster', ylabel='n_counts'>
sns.boxplot(ad.obs, x='Cluster', y='n_genes')
<AxesSubplot:xlabel='Cluster', ylabel='n_genes'>
dt
fig = (
so.Plot(data=ad_raw.obs)
.facet(row='Sample')
.add(so.Bars(), so.Hist(binrange=(0, 8000)), so.Stack(), x='n_genes', color='Sample', legend=False)
.share(y=False)
.limit(x=(0, 8000))
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=400, y=[0, 1250], orient='y', data={})
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=4000, y=[0, 1250], orient='y', data={})
.scale(color='deep')
.theme({**dt_snsStyle, 'axes.titlesize':20, "axes.labelsize":18})
.label(x='Number of genes', y='Counts')
.layout(size=(6,15))
.plot()._figure
)
axs = fig.axes
for ax in axs:
ax.axvline(x=600, ls='--', color='red', lw=1)
ax.axvline(x=3000, ls='--', color='red', lw=1)
ax.axvline(x=400, ls='--', color='black', lw=1)
ax.axvline(x=4000, ls='--', color='black', lw=1)
fig
fig = (
so.Plot(data=ad_raw.obs)
.facet(row='Sample')
.add(so.Bars(), so.Hist(binrange=(0, 10000)), so.Stack(), x='n_counts', color='Sample', legend=False)
.limit(x=(0, 10000))
.share(y=False)
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=600, y=[0, 1350], orient='y', data={})
# .add(so.Line(linestyle='--', color='black', linewidth=1), x=6000, y=[0, 1350], orient='y', data={})
.scale(color='deep')
.theme({**dt_snsStyle, 'axes.titlesize':20, "axes.labelsize":18})
.label(x='Number of UMIs', y='Counts')
.layout(size=(6,15))
.plot()._figure
)
axs = fig.axes
for ax in axs:
ax.axvline(x=800, ls='--', color='red', lw=1)
ax.axvline(x=4000, ls='--', color='red', lw=1)
ax.axvline(x=600, ls='--', color='black', lw=1)
ax.axvline(x=6000, ls='--', color='black', lw=1)
fig
ad_qc1 = ad_raw[ad_raw.obs.eval("600 < n_genes < 3000 & 800 < n_counts < 4000")]
ad_qc1.obs.value_counts("batch")
batch nodule_large 10980 nodule_small 7293 root 5582 dtype: int64
ad_qc1.layers['raw'] = ad_qc1.X.copy()
sc.pp.highly_variable_genes(ad_qc1, layer='raw', batch_key='batch', n_top_genes=5000, flavor='seurat_v3')
ad_qc1forScvi = singleCellTools.basic.getPartialLayersAdata(ad_qc1, 'raw', ['batch'], ['highly_variable'])
ad_qc1forScvi = ad_qc1forScvi[:, ad_qc1forScvi.var['highly_variable']].copy()
toPkl(ad_qc1forScvi, 'ad_qc1forScvi', 'scem')
2022-09-23 17:27:31.062 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_qc1forScvi', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
0
# run on scem
# scvi.model.SCVI.setup_anndata(
# ad_qc1forScvi,
# batch_key='batch'
# )
# scvi.settings.seed = 39
# scvi.settings.num_threads = 24
# model = scvi.model.SCVI(ad_qc1forScvi)
# model.train()
# ad_qc1forScvi.obsm['X_scvi'] = model.get_latent_representation(ad_qc1forScvi).copy()
# toPkl(ad_qc1forScvi, 'ad_qc1forScvi', 'ipf')
ad_qc1forScvi = loadPkl('ad_qc1forScvi', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
sc.pp.neighbors(ad_qc1forScvi, use_rep='X_scvi')
sc.tl.umap(ad_qc1forScvi)
ad_qc1forScvi.obs['Cluster'] = ad.obs['Cluster']
ad_qc1forScvi.obs['leiden_R'] = ad.obs['leiden_R']
ad_qc1forScvi
AnnData object with n_obs × n_vars = 23855 × 5000
obs: 'batch', '_scvi_batch', '_scvi_labels', 'Cluster', 'leiden_R'
var: 'highly_variable'
uns: '_scvi_manager_uuid', '_scvi_uuid', 'neighbors', 'umap'
obsm: 'X_scvi', 'X_umap'
obsp: 'distances', 'connectivities'
ax = sc.pl.umap(ad_qc1forScvi, color='Cluster', show=False, na_in_legend=False, components=('2,1'))
# ax.invert_yaxis()
# ax.invert_xaxis()
plt.show()
ad_qc1forScvi.uns['Cluster_colors'] = ad.uns['Cluster_colors']
sc.pl.umap(ad_qc1forScvi, color='Cluster', show=False, na_in_legend=False, components=('2,1'), legend_loc='on data', title='Stringent cutoff\n(red line)')
plt.show()
# ax.invert_yaxis()
sc.pl.umap(ad, color = 'Cluster', legend_loc='on data', title='Finally used cutoffs\n(black line)')
ax = sc.pl.umap(
ad_qc1forScvi[ad_qc1forScvi.obs.eval("Cluster in ['12']")],
color="leiden_R", show=False, palette=['#EEBFC2', '#DE1E2A'], legend_loc=None,size=12, components=('2,1')
)
sns.despine(left=True, bottom=True)
plt.title('')
plt.xlabel('')
plt.ylabel('')
plt.show()
NO further QC
ad_qc2 = ad_raw.copy()
ad_qc2.layers['raw'] = ad_qc2.X.copy()
sc.pp.highly_variable_genes(ad_qc2, layer='raw', batch_key='batch', n_top_genes=5000, flavor='seurat_v3')
ad_qc2forScvi = singleCellTools.basic.getPartialLayersAdata(ad_qc2, 'raw', ['batch'], ['highly_variable'])
ad_qc2forScvi = ad_qc2forScvi[:, ad_qc2forScvi.var['highly_variable']].copy()
toPkl(ad_qc2forScvi, 'ad_qc2forScvi', 'scem')
2022-09-23 19:17:57.778 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_qc2forScvi', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
0
# run on scem
# scvi.model.SCVI.setup_anndata(
# ad_qc2forScvi,
# batch_key='batch'
# )
# scvi.settings.seed = 39
# scvi.settings.num_threads = 24
# model = scvi.model.SCVI(ad_qc2forScvi)
# model.train()
# ad_qc2forScvi.obsm['X_scvi'] = model.get_latent_representation(ad_qc2forScvi).copy()
# toPkl(ad_qc2forScvi, 'ad_qc2forScvi', 'ipf')
ad_qc2forScvi = loadPkl('ad_qc2forScvi', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
sc.pp.neighbors(ad_qc2forScvi, use_rep='X_scvi')
sc.tl.umap(ad_qc2forScvi)
ad_qc2forScvi.obs['Cluster'] = ad.obs['Cluster']
ad_qc2forScvi.obs['leiden_R'] = ad.obs['leiden_R']
ad_qc2forScvi.uns['Cluster_colors'] = ad.uns['Cluster_colors']
ax = sc.pl.umap(ad_qc2forScvi, color='Cluster', legend_loc='on data', title='Doublets removing only', show=False, na_in_legend=False)
ax.invert_yaxis()
plt.show()
ax = sc.pl.umap(
ad_qc2forScvi[ad_qc2forScvi.obs.eval("Cluster in ['12']")],
color="leiden_R", show=False, palette=['#EEBFC2', '#DE1E2A'], legend_loc=None,size=12
)
sns.despine(left=True, bottom=True)
plt.title('')
plt.xlabel('')
plt.ylabel('')
ax.invert_yaxis()
# ax.invert_xaxis()
plt.show()
ad_st = loadPkl('ad_afterTissueCut_polished', lambda **dt:sc.read_h5ad(**dt), arg_path='filename', dir_path=dir_temp)
ad_sc = sc.read_h5ad(f"{dir_result}/ad_20220311.v1.h5ad")
ad_scNodule = ad_sc[ad_sc.obs.eval("Sample != 'Root'")].copy()
toPkl(ad_st, 'ad_st', 'scem')
toPkl(ad_scNodule, 'ad_scNodule', 'scem')
# scem :
# ad_st = loadPkl('ad_st', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
# ad_scNodule = loadPkl('ad_scNodule', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
# ls_keepCluster = ad_scNodule.obs['Cluster'].value_counts().pipe(lambda sr:sr[sr > 500].index.to_list())
# ad_scNoduleFiltered = ad_scNodule[ad_scNodule.obs.eval("Cluster in @ls_keepCluster")]
# singleCellTools.spatialTools.getClusterScoreFromScDataByDestvi(
# ad_st,
# ad_scNoduleFiltered,
# clusterKey="Cluster",
# nFeatures=3000,
# condScviEpoch=1000,
# destviEpoch=4000,
# minUmiCountsInStLayer = 10,
# batchSize=1024, hvgLabel='Sample',
# hvgScDataOnly=True,
# # dt_condScviConfigs={'n_layers':4},
# )
# toPkl(ad_st, 'ad_st_afterDeconvolution', 'ipf')
ad_st = loadPkl('ad_st_afterDeconvolution', lambda **dt:sc.read_h5ad(**dt), arg_path='filename', dir_path=dir_result)
with plt.rc_context({"figure.figsize": (3.5, 4)}):
sc.pl.spatial(
singleCellTools.plotting.obsmToObs(_ad_stSm, "proportions"),
color=ad_st.obsm["proportions"].columns[:-1],
size=0.2,
cmap="Reds",
wspace=0,
alpha=0.2,
ncols=6,
show=False,
)
plt.suptitle("Devolution Results\n(Small Nodule)", x=0.45, y=1.02, fontsize=24)
... storing 'destVI_results' as categorical ... storing 'destVI_results_merged' as categorical
with plt.rc_context({"figure.figsize": (3.5, 4)}):
sc.pl.spatial(
singleCellTools.plotting.obsmToObs(_ad_stLg, "proportions"),
color=ad_st.obsm["proportions"].columns[:-1],
size=0.2,
cmap="Reds",
wspace=0,
alpha=0.5,
ncols=6,
show=False,
)
plt.suptitle("Devolution Results\n(Large Nodule)", x=0.45, y=1.02, fontsize=24)
... storing 'destVI_results' as categorical ... storing 'destVI_results_merged' as categorical
ad_stAlign = sc.read_h5ad(f"{dir_result}/st_alignCross_0503.h5ad")
fig, axs = plt.subplots(2,6, figsize=(20,8))
axs = axs.reshape(-1)
ls_useCluster = ad_stAlign.obsm['proportions'].columns[:-1]
for ax, cluster in zip(axs, ls_useCluster):
# sc.pl.spatial(
# ad_align,
# size=0.075,
# ax=ax,show=False,img_key="cross"
# )
_ad = singleCellTools.plotting.obsmToObs(ad_stAlign, "proportions")
_ad = _ad[_ad.obs[cluster] > 0.25]
sc.pl.spatial(
_ad,
color=cluster,
size=0.2,
ax=ax,
cmap='Reds',
vmin=0, vmax=1,colorbar_loc=None,show=False,img_key="cross"
)
# ax.invert_yaxis()
sns.despine(left=True, bottom=True)
plt.sca(ax)
plt.xlabel('')
plt.ylabel('')
axs[-1].set_visible(False)
plt.tight_layout()
plt.show()
# ad_stAlign.write_h5ad(f"{dir_result}/stAlignedAndCuted.h5ad")
dt_cluster2Tissue = {
"0": "Uninfected cells*",
"1": "Inner cortex",
"2": "Outer cortex*",
"3": "Vascular bundle",
"4": "Outer cortex*",
"5": "Epidermis",
"6": "Unknown",
"7": "Uninfected cells*",
"8": "Endodermis",
"9":"Vascular bundle",
"10":"Unknown",
"11":"Uninfected cells*",
"12":"Infected cells",
"13":"Unknown",
"14":"Unknown",
}
dt_cluster2Tissue = {x:y.rstrip('*') for x,y in dt_cluster2Tissue.items()}
dt_cluster2Tissue = {x:f"{x}\n({y})" for x,y in dt_cluster2Tissue.items()}
_ad = singleCellTools.plotting.obsmToObs(ad_stAlign, "proportions")
_ad = _ad[_ad.obs[cluster] > 0.25]
ax = sc.pl.spatial(
_ad,
color=cluster,
size=0.2,
cmap="Reds",
vmin=0,
vmax=1,
colorbar_loc=None,
show=False,
img_key="cross",
)[0]
# ax.invert_yaxis()
sns.despine(left=True, bottom=True)
plt.xlabel("")
plt.ylabel("")
plt.title(dt_cluster2Tissue[ax.get_title()])
Text(0.5, 1.0, '12\n(Infected cells)')
ad_stAlign = sc.read_h5ad(f"{dir_result}/stAlignedAndCuted.h5ad")
ax.get_xlim(), ax.get_ylim()
((-18.346480678304268, 617.3859695561097), (447.6898209228389, -19.078977186801858))
fig, axs = plt.subplots(4, 3, figsize=(14, 8))
axs = axs.reshape(-1)
ls_useCluster = ad_stAlign.obsm["proportions"].columns[:-1]
for ax, cluster in zip(axs[1:], [x for x in ls_useCluster if x not in ['3', '8']]):
# sc.pl.spatial(
# ad_align,
# size=0.075,
# ax=ax,show=False,img_key="cross"
# )
_ad = singleCellTools.plotting.obsmToObs(ad_stAlign, "proportions")
_ad = _ad[_ad.obs[cluster] > 0.25]
sc.pl.spatial(
_ad,
color=cluster,
size=0.2,
ax=ax,
cmap="Reds",
vmin=0,
vmax=1,
colorbar_loc=None,
show=False,
img_key="cross",
)
# ax.invert_yaxis()
plt.sca(ax)
plt.xlabel("")
plt.ylabel("")
plt.xlim(-18.346480678304268, 617.3859695561097)
plt.ylim(447.6898209228389, -19.078977186801858)
ax.spines['top'].set_color('grey')
ax.spines['right'].set_color('grey')
ax.spines['bottom'].set_color('grey')
ax.spines['left'].set_color('grey')
plt.title(dt_cluster2Tissue[ax.get_title()])
ax = axs[0]
plt.sca(ax)
sc.pl.spatial(
_ad,
ax=axs[0],
colorbar_loc=None,
show=False,
img_key="cross",
)
ax.spines['top'].set_color('grey')
ax.spines['right'].set_color('grey')
ax.spines['bottom'].set_color('grey')
ax.spines['left'].set_color('grey')
plt.xlabel("")
plt.ylabel("")
plt.xlim(-18.346480678304268, 617.3859695561097)
plt.ylim(447.6898209228389, -19.078977186801858)
axs[-2].remove()
axs[-1].remove()
plt.tight_layout()
plt.show()
from itertools import cycle
colWidth = 0.115
fig, axs = plt.subplots(2, 5, figsize=(17, 4))
axs = axs.reshape(-1)
ls_useCluster = ad_stAlign.obsm["proportions"].columns[:-1]
for ax, cluster in zip(axs, [x for x in ls_useCluster if x not in ['3', '8']]):
# sc.pl.spatial(
# ad_align,
# size=0.075,
# ax=ax,show=False,img_key="cross"
# )
_ad = singleCellTools.plotting.obsmToObs(ad_stAlign, "proportions")
_ad = _ad[_ad.obs[cluster] > 0.25]
sc.pl.spatial(
_ad,
color=cluster,
size=0.2,
ax=ax,
cmap="Reds",
vmin=0,
vmax=1,
colorbar_loc=None,
show=False,
img_key="cross",
)
# ax.invert_yaxis()
plt.sca(ax)
plt.xlabel("")
plt.ylabel("")
plt.xlim(-18.346480678304268, 617.3859695561097)
plt.ylim(447.6898209228389, -19.078977186801858)
ax.spines['top'].set_color('grey')
ax.spines['right'].set_color('grey')
ax.spines['bottom'].set_color('grey')
ax.spines['left'].set_color('grey')
# plt.title(dt_cluster2Tissue[ax.get_title()])
plt.tight_layout()
for ax, icol in zip(axs, cycle([0,1,2,3,4])):
pos = ax.get_position()
ax.set_position([colWidth * icol, pos.y0, pos.x1 - pos.x0, pos.y1-pos.y0])
axs[-1].remove()
plt.show()
colWidth = 0.25
fig, axs = plt.subplots(1, 4, figsize=(12, 3))
axs = axs.reshape(-1)
ls_useCluster = ad_stAlign.obsm["proportions"].columns[:-1]
for ax, cluster in zip(axs, [x for x in ["0", "7", "11", "12"]]):
# sc.pl.spatial(
# ad_align,
# size=0.075,
# ax=ax,show=False,img_key="cross"
# )
_ad = singleCellTools.plotting.obsmToObs(ad_stAlign, "proportions")
_ad = _ad[_ad.obs[cluster] > 0.25]
sc.pl.spatial(
_ad,
color=cluster,
size=0.2,
ax=ax,
cmap="Reds",
vmin=0,
vmax=1,
colorbar_loc=None,
show=False,
img_key="cross",
)
# ax.invert_yaxis()
plt.sca(ax)
plt.xlabel("")
plt.ylabel("")
plt.xlim(-18.346480678304268, 617.3859695561097)
plt.ylim(447.6898209228389, -19.078977186801858)
ax.spines['top'].set_color('grey')
ax.spines['right'].set_color('grey')
ax.spines['bottom'].set_color('grey')
ax.spines['left'].set_color('grey')
# plt.title(dt_cluster2Tissue[ax.get_title()])
plt.tight_layout()
for ax, icol in zip(axs, cycle([0,1,2,3])):
pos = ax.get_position()
ax.set_position([colWidth * icol, pos.y0, pos.x1 - pos.x0, pos.y1-pos.y0])
plt.show()
with plt.rc_context({"figure.figsize": (3.5, 4)}):
axs = sc.pl.spatial(
singleCellTools.plotting.obsmToObs(ad_stAlign, "proportions"),
color=["0", "7", "11", "12"],
size=0.2,
cmap="Reds",
wspace=0,
hspace=0,
alpha=0.5,
ncols=2,
show=False,
colorbar_loc=None,
img_key="cross",
)
for ax in axs:
plt.sca(ax)
plt.xlabel("")
plt.ylabel("")
sns.despine(left=True, bottom=True)
plt.title(ax.get_title(), fontdict=dict(size=18))
plt.suptitle(
"Signal strength (proportion) of single-nuclei\ncluster in infected zone",
x=0.5,
y=0.96,
fontsize=20,
)
plt.show()
# fig, ax = plt.subplots(figsize=(6, 3))
_ls = ['12']
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False, na_in_legend=False, groups = _ls)
# sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="", groups=[dt_cluster2Tissue[x] for x in _ls], na_in_legend=False, legend_loc=None)
sns.despine(top=True, right=True)
# plt.legend(loc="upper left", bbox_to_anchor=(0, -0.1), ncol=2, frameon=False)
plt.title("")
plt.show()
# fig, ax = plt.subplots(figsize=(6, 3))
_ls = ['0', '7', '11', '12']
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False, na_in_legend=False, groups = _ls)
# sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="", groups=[dt_cluster2Tissue[x] for x in _ls], na_in_legend=False, legend_loc=None)
sns.despine(top=True, right=True)
# plt.legend(loc="upper left", bbox_to_anchor=(0, -0.1), ncol=2, frameon=False)
plt.title("Cells located in infected zone")
plt.show()
ad_stLarge = ad_stAlign[ad_stAlign.obs.eval("batch == 'lg2'")]
sc.pl.spatial(
ad_stLarge,
colorbar_loc=None,
show=False,
img_key="cross",
)
sns.despine(left=True, bottom=True)
plt.xlabel("")
plt.ylabel("")
Text(0, 0.5, '')
dt_mergeStPropotion = {
"0": ["0", "12", "7", "11"],
"1": ["1"],
"2": ["2", "4"],
"3": ["9"],
}
def mergePropotion(line):
dt_finalPropotion = {}
for mergedCluster, ls_elementCluster in dt_mergeStPropotion.items():
dt_finalPropotion[mergedCluster] = 0
for element in ls_elementCluster:
dt_finalPropotion[mergedCluster] += line.at[element]
return pd.Series(dt_finalPropotion).rename(line.name)
ls_results = []
for _, line in ad_stLarge.obsm["proportions"].iterrows():
ls_results.append(mergePropotion(line))
ad_stLarge.obsm['propotion_merged'] = pd.concat(ls_results, axis=1).T
ad_stLarge.obs['propotion_merged'] = np.where(
ad_stLarge.obsm["propotion_merged"].max(1) > 0.5,
ad_stLarge.obsm["propotion_merged"].idxmax(1),
None,
)
_dt_color = singleCellTools.basic.getadataColor(ad, 'Cluster')
singleCellTools.basic.setadataColor(ad_stLarge, 'propotion_merged', _dt_color);
sc.pl.spatial(
ad_stLarge,
color="propotion_merged",
title='',
size=0.12,
show=False,
img_key="cross", na_in_legend=False
)
plt.legend(ncol=1, loc='upper left', bbox_to_anchor=(0,-0.2), frameon=False)
sns.despine(left=True, bottom=True)
plt.xlabel("")
plt.ylabel("")
Text(0, 0.5, '')
ad_stAlign = sc.read_h5ad(f"{dir_result}/stAlignedAndCuted.h5ad")
ad_stAlign.obs['imagecol'] = ad_stAlign.obsm['spatial'][:, 0]
ad_stAlign.obs['imagerow'] = ad_stAlign.obsm['spatial'][:, 1]
ad_stSm = ad_stAlign[ad_stAlign.obs.eval("Sample == 'Small Nodule'")]
ad_stLg = ad_stAlign[ad_stAlign.obs.eval("Sample == 'Large Nodule'")]
so_stSm = singleCellTools.normalize.normalizeBySCT_r(ad_stSm)
so_stLg = singleCellTools.normalize.normalizeBySCT_r(ad_stLg)
R[write to console]: Warning:
R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')
R[write to console]: Warning:
R[write to console]: Keys should be one or more alphanumeric characters followed by an underscore, setting key from umap_scvi_total_ to umapscvitotal_
R[write to console]: Warning:
R[write to console]: All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to umapscvitotal_
R[write to console]: Warning:
R[write to console]: Adding a Graph without an assay associated with it
R[write to console]: Warning:
R[write to console]: Adding a Graph without an assay associated with it
R[write to console]: vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
R[write to console]: Calculating cell attributes from input UMI matrix: log_umi
R[write to console]: Total Step 1 genes: 21116
R[write to console]: Total overdispersed genes: 20883
R[write to console]: Excluding 233 genes from Step 1 because they are not overdispersed.
R[write to console]: Variance stabilizing transformation of count matrix of size 21116 by 4100
R[write to console]: Model formula is y ~ log_umi
R[write to console]: Get Negative Binomial regression parameters per gene
R[write to console]: Using 2000 genes, 4100 cells
|======================================================================| 100%
R[write to console]: Setting estimate of 0 genes to inf as theta_mm/theta_mle < 1e-3 R[write to console]: # of step1 poisson genes (variance < mean): 0 R[write to console]: # of low mean genes (mean < 0.001): 0 R[write to console]: Total # of Step1 poisson genes (theta=Inf; variance < mean): 0 R[write to console]: Total # of poisson genes (theta=Inf; variance < mean): 233 R[write to console]: Calling offset model for all 233 poisson genes R[write to console]: Ignoring theta inf genes R[write to console]: Replacing fit params for 233 poisson genes by theta=Inf R[write to console]: Setting min_variance based on median UMI: 0.16 R[write to console]: Second step: Get residuals using fitted parameters for 21116 genes
|======================================================================| 100%
R[write to console]: Computing corrected count matrix for 21116 genes
|======================================================================| 100%
R[write to console]: Calculating gene attributes
R[write to console]: Wall clock passed: Time difference of 1.582168 mins
R[write to console]: Determine variable features
R[write to console]: Place corrected count matrix in counts slot
R[write to console]: Centering data matrix
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========== | 14%
|
|============ | 17%
|
|============== | 21%
|
|================= | 24%
|
|=================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 45%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 55%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|=================================================== | 72%
|
|===================================================== | 76%
|
|======================================================== | 79%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
R[write to console]:
R[write to console]: Set default assay to SCT
R[write to console]: Warning:
R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')
R[write to console]: Warning:
R[write to console]: Keys should be one or more alphanumeric characters followed by an underscore, setting key from umap_scvi_total_ to umapscvitotal_
R[write to console]: Warning:
R[write to console]: All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to umapscvitotal_
R[write to console]: Warning:
R[write to console]: Adding a Graph without an assay associated with it
R[write to console]: Warning:
R[write to console]: Adding a Graph without an assay associated with it
R[write to console]: vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
R[write to console]: Calculating cell attributes from input UMI matrix: log_umi
R[write to console]: Total Step 1 genes: 15833
R[write to console]: Total overdispersed genes: 15553
R[write to console]: Excluding 280 genes from Step 1 because they are not overdispersed.
R[write to console]: Variance stabilizing transformation of count matrix of size 15833 by 3574
R[write to console]: Model formula is y ~ log_umi
R[write to console]: Get Negative Binomial regression parameters per gene
R[write to console]: Using 2000 genes, 3574 cells
|======================================================================| 100%
R[write to console]: Setting estimate of 0 genes to inf as theta_mm/theta_mle < 1e-3 R[write to console]: # of step1 poisson genes (variance < mean): 0 R[write to console]: # of low mean genes (mean < 0.001): 0 R[write to console]: Total # of Step1 poisson genes (theta=Inf; variance < mean): 0 R[write to console]: Total # of poisson genes (theta=Inf; variance < mean): 280 R[write to console]: Calling offset model for all 280 poisson genes R[write to console]: Found 1 outliers - those will be ignored in fitting/regularization step R[write to console]: Ignoring theta inf genes R[write to console]: Replacing fit params for 280 poisson genes by theta=Inf R[write to console]: Setting min_variance based on median UMI: 0.04 R[write to console]: Second step: Get residuals using fitted parameters for 15833 genes
|======================================================================| 100%
R[write to console]: Computing corrected count matrix for 15833 genes
|======================================================================| 100%
R[write to console]: Calculating gene attributes R[write to console]: Wall clock passed: Time difference of 1.024059 mins R[write to console]: Determine variable features R[write to console]: Place corrected count matrix in counts slot R[write to console]: Centering data matrix | | | 0% | |=== | 5% | |====== | 9% | |========== | 14% | |============= | 18% | |================ | 23% | |=================== | 27% | |====================== | 32% | |========================= | 36% | |============================= | 41% | |================================ | 45% | |=================================== | 50% | |====================================== | 55% | |========================================= | 59% | |============================================= | 64% | |================================================ | 68% | |=================================================== | 73% | |====================================================== | 77% | |========================================================= | 82% | |============================================================ | 86% | |================================================================ | 91% | |=================================================================== | 95% | |======================================================================| 100% R[write to console]: R[write to console]: Set default assay to SCT
ad_stBc = ad_stAlign.copy()
ad_stSm = ad_stSm[:, ad_stSm.uns["SCT_data_features"] >> F(map, lambda x:x.replace("-", "_")) >> F(list)]
ad_stLg = ad_stLg[:, ad_stLg.uns["SCT_data_features"] >> F(map, lambda x:x.replace("-", "_")) >> F(list)]
ad_stSm.layers['SCT_counts'] = so_stSm["SCT"].layers['SCT_counts']
ad_stLg.layers['SCT_counts'] = so_stLg["SCT"].layers['SCT_counts']
ad_stSm.layers['SCT_scale.data'] = so_stSm['SCT_scale.data'].layers['SCT_scale.data']
ad_stLg.layers['SCT_scale.data'] = so_stLg['SCT_scale.data'].layers['SCT_scale.data']
ad_stAlign = sc.concat([ad_stSm, ad_stLg], uns_merge="first")
n_inducing = 1000
sc.pp.highly_variable_genes(ad_stAlign, flavor="seurat_v3", n_top_genes=3000, batch_key='Sample', layer='raw')
mu.tl.mofa(ad_stAlign, groups_label='Sample', n_factors=5,
use_layer='SCT_scale.data',
center_groups=False,
smooth_covariate=["imagerow", "imagecol"],
smooth_kwargs={
"sparseGP": True, "frac_inducing": n_inducing/ad_stAlign.n_obs,
"start_opt": 10, "opt_freq": 10,
},
use_float32=True, seed=39,
quiet=False)
#########################################################
### __ __ ____ ______ ###
### | \/ |/ __ \| ____/\ _ ###
### | \ / | | | | |__ / \ _| |_ ###
### | |\/| | | | | __/ /\ \_ _| ###
### | | | | |__| | | / ____ \|_| ###
### |_| |_|\____/|_|/_/ \_\ ###
### ###
#########################################################
use_float32 set to True: replacing float64 arrays by float32 arrays to speed up computations...
Loaded view='data' group='Small Nodule' with N=3574 samples and D=3000 features...
Loaded view='data' group='Large Nodule' with N=4100 samples and D=3000 features...
Model options:
- Automatic Relevance Determination prior on the factors: True
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: True
Likelihoods:
- View 0 (data): gaussian
Loaded 2 covariate(s) for each sample...
Smooth covariate framework is activated. This is not compatible with ARD prior on factors. Setting ard_factors to False...
##
## sparseGP set to True: using sparse Gaussian Process to speed up the training procedure
##
######################################
## Training the model with seed 39 ##
######################################
ELBO before training: -96125516.70
Iteration 1: time=13.16, ELBO=-13514712.71, deltaELBO=82610803.985 (85.94055650%), Factors=5
Iteration 2: time=13.52, ELBO=-13504152.09, deltaELBO=10560.626 (0.01098629%), Factors=5
Iteration 3: time=13.02, ELBO=-13503040.67, deltaELBO=1111.416 (0.00115621%), Factors=5
Iteration 4: time=13.22, ELBO=-13502564.59, deltaELBO=476.082 (0.00049527%), Factors=5
Iteration 5: time=14.54, ELBO=-13502929.72, deltaELBO=-365.135 (0.00037985%), Factors=5
Warning, lower bound is decreasing...
Iteration 6: time=12.83, ELBO=-13503400.84, deltaELBO=-471.115 (0.00049010%), Factors=5
Warning, lower bound is decreasing...
Iteration 7: time=11.93, ELBO=-13503772.40, deltaELBO=-371.558 (0.00038653%), Factors=5
Warning, lower bound is decreasing...
Iteration 8: time=12.35, ELBO=-13504026.68, deltaELBO=-254.286 (0.00026454%), Factors=5
Warning, lower bound is decreasing...
Iteration 9: time=11.75, ELBO=-13504222.26, deltaELBO=-195.577 (0.00020346%), Factors=5
Warning, lower bound is decreasing...
Optimising sigma node...
Iteration 10: time=5129.17, ELBO=-13431341.73, deltaELBO=72880.528 (0.07581809%), Factors=5
Iteration 11: time=27.69, ELBO=-13468024.58, deltaELBO=-36682.847 (0.03816140%), Factors=5
Warning, lower bound is decreasing...
Iteration 12: time=27.57, ELBO=-13453833.70, deltaELBO=14190.885 (0.01476287%), Factors=5
Iteration 13: time=27.48, ELBO=-13452054.91, deltaELBO=1778.791 (0.00185049%), Factors=5
Iteration 14: time=27.49, ELBO=-13451238.94, deltaELBO=815.964 (0.00084885%), Factors=5
Iteration 15: time=27.04, ELBO=-13450772.31, deltaELBO=466.634 (0.00048544%), Factors=5
Iteration 16: time=27.33, ELBO=-13450472.74, deltaELBO=299.570 (0.00031164%), Factors=5
Converged!
#######################
## Training finished ##
#######################
Saving model in /tmp/mofa_20221008-211048.hdf5...
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) /home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb Cell 56 in <cell line: 1>() ----> <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a> mu.tl.mofa(ad_stAlign, groups_label='Sample', n_factors=5, <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a> use_layer='SCT_scale.data', <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a> center_groups=False, <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a> smooth_covariate=["imagerow", "imagecol"], <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a> smooth_kwargs={ <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a> "sparseGP": True, "frac_inducing": n_inducing/ad_stAlign.n_obs, <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=6'>7</a> "start_opt": 10, "opt_freq": 10, <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=7'>8</a> }, <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=8'>9</a> use_float32=True, seed=39, <a href='vscode-notebook-cell://wsl%2Bubuntu/home/liuzj-lab/jupyter/sc_soybean_notebook/02.with_spatial.ipynb#Y225sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a> quiet=False) File ~/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/muon/_core/tools.py:609, in mofa(data, groups_label, use_raw, use_layer, use_var, use_obs, likelihoods, n_factors, scale_views, scale_groups, center_groups, ard_weights, ard_factors, spikeslab_weights, spikeslab_factors, n_iterations, convergence_mode, use_float32, gpu_mode, svi_mode, svi_batch_size, svi_learning_rate, svi_forgetting_rate, svi_start_stochastic, smooth_covariate, smooth_warping, smooth_kwargs, save_parameters, save_data, save_metadata, seed, outfile, expectations, save_interrupted, verbose, quiet, copy) 606 data.obsm["X_mofa"] = z 608 # Weights --> 609 w = np.concatenate([f["expectations"]["W"][m][:, :] for m in data.mod], axis=1).T 610 if use_var: 611 # Set the weights of features that were not used to zero 612 data.varm["LFs"] = np.zeros(shape=(data.n_vars, w.shape[1])) AttributeError: 'AnnData' object has no attribute 'mod'
ad_stAlign = loadPkl('ad_stAlign_20221008_mefisto', lambda **dt:sc.read_h5ad(**dt), arg_path='filename', dir_path=dir_result)
sc.pp.neighbors(ad_stAlign, use_rep='X_mofa', n_neighbors=15)
sc.tl.umap(ad_stAlign, 0.3)
sc.pl.umap(ad_stAlign, color='Sample')
sc.tl.leiden(ad_stAlign, 0.2)
sc.pl.umap(ad_stAlign, color='leiden')
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
sc.pl.spatial(
ad_stAlign,
color="leiden",
title='',
size=0.12,
show=False,
# alpha=0.75,
img_key="cross", na_in_legend=False,
ax=ax
)
# plt.legend(ncol=1, loc='upper left', bbox_to_anchor=(0,-0.2), frameon=False)
sns.despine(left=True, bottom=True)
plt.xlabel("")
plt.ylabel("")
Text(0, 0.5, '')
dt_stAnno = {"0": "0: Infected Zone", "1": "1: Inner Cortex", "2": "2: Outer Cortex", "3": "3: Epidermis", "4": "4: Outer Cortex", "5": "5: Vascular Bundle"}
ad_stAlign.obs["annotation"] = ad_stAlign.obs["leiden"].map(dt_stAnno)
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
sc.pl.spatial(
ad_stAlign,
color="annotation",
title='',
size=0.15,
show=False,
# alpha=0.75,
img_key="cross", na_in_legend=False,
ax=ax
)
plt.legend(ncol=3, loc='upper center', bbox_to_anchor=(0.5,-0.05), frameon=False, fontsize=14)
plt.title("stereo-seq", fontsize=18)
plt.savefig("/public/home/liuzj/share/scSoybean/allSpatial_sample/leiden.png", dpi=300, bbox_inches='tight')
fig, axs = plt.subplots(2, 3, figsize=(12,8))
axs = axs.reshape(-1)
for ax, ct in zip(axs, dt_stAnno.values()):
sc.pl.spatial(
ad_stAlign,
color="annotation",
title=ct,
groups=[ct],
size=0.2,
show=False,
# alpha=0.75,
img_key="cross",
legend_loc=None,
na_in_legend=False,
ax=ax
)
# toPkl(ad_stAlign, 'ad_stAlign_20221013_mefisto_anno', 'ipf', dir_path=dir_result)
2022-10-13 16:30:48.102 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_stAlign_20221013_mefisto_anno', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
ad_stAlign = loadPkl('ad_stAlign_20221013_mefisto_anno', lambda **dt:sc.read_h5ad(**dt), arg_path='filename', dir_path=dir_result)
singleCellTools.geneEnrichInfo.calculateEnrichScoreByCellex(ad_stAlign, 'raw', 'leiden')
dt_stSpecGenes = ad_stAlign.uns['leiden_cellexES'].query(
"enrichScore > 0.75 & expressed_ratio / expressed_ratio_others > 2"
).groupby('leiden')['gene'].agg(list).to_dict()
# ad_stAlign.uns['leiden_cellexES'].query(
# "enrichScore > 0.75 & expressed_ratio / expressed_ratio_others > 2"
# ).to_excel(f"{dir_result}/stereoseq_spec_genes.xlsx")
df_stSpecGenes = ad_stAlign.uns['leiden_cellexES'].query(
"enrichScore > 0.75 & expressed_ratio / expressed_ratio_others > 2"
)
df_stSpecGenes.merge(df_symbol, how="left", left_on="gene", right_index=True).eval(
"Symbol = Symbol.fillna('') \n other_designations = other_designations.fillna('')",
engine="python",
).to_excel(f"{dir_result}/stereoseq_spec_genes.xlsx")
_str = """GLYMA_05G244700 GLYMA_05G151300 GLYMA_11G185200 GLYMA_13G024500 GLYMA_18G036300
GLYMA_06G182700 GLYMA_20G203800
GLYMA_03G185900 GLYMA_09G093000 GLYMA_20G241600
GLYMA_05G220500 GLYMA_09G092700 GLYMA_12G217300 GLYMA_17G139700 GLYMA_17G019300
GLYMA_02G160500 GLYMA_02G003700 GLYMA_02G135100 GLYMA_06G310700"""
_ls = _str >> F(lambda x:x.split('\n')) >> F(map, str.split) >> F(list)
dt_stMarkerGeneExample = {x:y for x,y in zip(["0: Infected Zone", "1: Inner Cortex", "2: Outer Cortex", "4: Outer Cortex", "5: Vascular Bundle"], _ls)}
axs = sc.pl.dotplot(ad_stAlign, dt_stMarkerGeneExample, groupby='annotation', cmap='Reds', layer='normalize_log', standard_scale='var', dot_max=0.25, figsize=(12,2.5), show=False)
ax = axs['gene_group_ax']
plt.sca(ax)
ax = axs['gene_group_ax']
for text in ax.texts:
text.set(rotation=30, ha='left')
# _ls = ['12','0', '7', '11', '1', '4','2', '3','9', '5', '6', '8','10', '13', '14']
_ls = ['12','0', '7', '11', '1', '4','2', '3','9', '5']
_ad = ad[ad.obs.eval("Cluster in @_ls")].copy()
_ad.obs['Cluster'] = _ad.obs['Cluster'].cat.set_categories(_ls)
sc.pl.dotplot(_ad, dt_stMarkerGeneExample, groupby='Cluster', cmap='Reds', layer='normalize_log', standard_scale='var', dot_max=0.5, figsize=(12,4))
for ct, ls_genes in dt_stMarkerGeneExample.items():
ax = sc.pl.umap(ad, color=ls_genes, layer='normalize_log', cmap='Reds', ncols=5, show=False)
plt.suptitle(ct, x=0.45, y=1.02, fontsize=14)
plt.show()
sc.pl.spatial(
ad_stAlign,
color=ls_genes,
size=0.15,
show=False,
# alpha=0.75,
img_key="cross", na_in_legend=False,
layer='normalize_log', cmap='Reds', ncols=5
)
plt.suptitle(ct, x=0.45, y=1.02, fontsize=14)
plt.show()
# dt_cluster2Tissue = {
# "0": "Cortex",
# "1": "Cortex",
# "2": "Unknown",
# "3": "Stele",
# "4": "Unknown",
# "5": "Epidermis",
# "6": "Cortex",
# "7": "Cortex",
# "8": "Endodermis",
# "9":"Stele",
# "10":"Cortex",
# "11":"Unknown",
# "12":"Infected zone",
# "13":"Unknown",
# "14":"Unknown",
# }
# dt_cluster2Tissue = {x:f"{x}: {y}" for x,y in dt_cluster2Tissue.items()}
# ad.obs['Cell type'] = ad.obs['Cluster'].map(dt_cluster2Tissue)
dt_cluster2Tissue = {
"0": "Uninfected cells*",
"1": "Inner cortex",
"2": "Outer cortex*",
"3": "Vascular bundle",
"4": "Outer cortex*",
"5": "Epidermis",
"6": "Unknown",
"7": "Uninfected cells*",
"8": "Unknown",
"9":"Vascular bundle",
"10":"Unknown",
"11":"Uninfected cells*",
"12":"Infected cells",
"13":"Unknown",
"14":"Unknown",
}
dt_cluster2Tissue = {x:f"{x}: {y}" for x,y in dt_cluster2Tissue.items()}
ad.obs['Cell type'] = ad.obs['Cluster'].map(dt_cluster2Tissue)
dt_cluster2Tissue = {
"0": "Cells in infected zone*",
"1": "Inner cortex",
"2": "Outer cortex*",
"3": "Vascular bundle",
"4": "Outer cortex*",
"5": "Epidermis",
"6": "Unknown",
"7": "Cells in infected zone*",
"8": "Unknown",
"9":"Vascular bundle",
"10":"Unknown",
"11":"Cells in infected zone*",
"12":"Infected cells",
"13":"Unknown",
"14":"Unknown",
}
dt_cluster2Tissue = {x:f"{x}: {y}" for x,y in dt_cluster2Tissue.items()}
ad.obs['Cell type'] = ad.obs['Cluster'].map(dt_cluster2Tissue)
%config InlineBackend.figure_format = 'png'
# fig, ax = plt.subplots(figsize=(6, 3))
_ls = range(15) | F(map, str) | F(filter, lambda x: x not in ['6', '8', '10', '13', '14']) | F(list)
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False, na_in_legend=False)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="", groups=[dt_cluster2Tissue[x] for x in _ls], na_in_legend=False)
# ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False)
# sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="")
plt.legend(loc="upper left", bbox_to_anchor=(-0.35, -0.1), ncol=3, frameon=False)
# plt.title("Single-nucleus transcriptomes")
sns.despine()
plt.show()
ad.obs['Sample_two'] = ad.obs['Sample'].map(lambda x:x.split(' ')[-1])
singleCellTools.basic.getadataColor(ad, 'Sample')
{'Large Nodule': '#1f77b4', 'Small Nodule': '#ff7f0e', 'Root': '#2ca02c'}
_ls = (
[x for x in ad.obs["Cell type"].unique() if ("*" not in x) & ("Unknown" not in x)]
| F(map, lambda x: x.split(":")[0])
) | F(list)
# fig, ax = plt.subplots(figsize=(6, 3))
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False, na_in_legend=False)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="", groups=[dt_cluster2Tissue[x] for x in _ls], na_in_legend=False)
sns.despine(top=True, right=True)
plt.legend(loc="upper left", bbox_to_anchor=(0, -0.1), ncol=2, frameon=False)
plt.title("Single-nuclei transcriptomes", fontsize=16)
plt.show()
_ls = (
[x for x in ad.obs["Cell type"].unique() if ("*" not in x) & ("Unknown" not in x)]
| F(map, lambda x: x.split(":")[0])
) | F(list)
fig,ax=plt.subplots(figsize=(2.5,3))
singleCellTools.plotting.plotLabelPercentageInCluster(
ad[ad.obs.eval("Cluster in @_ls")],
"Cluster",
"Sample_time",
dt_kwargsForLegend={"ncol": 3, "bbox_to_anchor": [0.5, -0.15], 'loc':'upper center'},
)
plt.show()
fig,ax=plt.subplots(figsize=(5,3))
singleCellTools.plotting.plotLabelPercentageInCluster(
ad[ad.obs.eval("Cluster not in @_ls")],
"Cluster",
"Sample_time",
dt_kwargsForLegend={"ncol": 3, "bbox_to_anchor": [0.5, -0.15], 'loc':'upper center'},
)
<AxesSubplot:xlabel='Cluster', ylabel='Percentage'>
import importlib
importlib.reload(singleCellTools.plotting)
<module 'jpy_tools.singleCellTools.plotting' from '/public/home/liuzj/softwares/anaconda3/lib/python3.8/site-packages/jpy_tools/singleCellTools/plotting.py'>
ad.obs['Sample_time']
AAACCCAAGACGCAGT-1-batch-nodule_large Nodule (21 dpi)
AAACCCAAGAGGATCC-1-batch-nodule_large Nodule (21 dpi)
AAACCCACAAATACAG-1-batch-nodule_large Nodule (21 dpi)
AAACCCACAGCAGTAG-1-batch-nodule_large Nodule (21 dpi)
AAACCCACAGCTGTAT-1-batch-nodule_large Nodule (21 dpi)
...
TTTGTTGGTGTTACAC-1-batch-root Root
TTTGTTGTCAGTCCGG-1-batch-root Root
TTTGTTGTCCTCTTTC-1-batch-root Root
TTTGTTGTCGGTTGTA-1-batch-root Root
TTTGTTGTCTCATTTG-1-batch-root Root
Name: Sample_time, Length: 26712, dtype: category
Categories (3, object): ['Nodule (12 dpi)', 'Nodule (21 dpi)', 'Root']
import importlib
importlib.reload(singleCellTools.plotting)
<module 'jpy_tools.singleCellTools.plotting' from '/public/home/liuzj/softwares/anaconda3/lib/python3.8/site-packages/jpy_tools/singleCellTools/plotting.py'>
with plt.rc_context({"figure.figsize": (1, 3)}):
_ls = ["0", "7", "11"]
singleCellTools.plotting.plotLabelPercentageInCluster(
ad[ad.obs.eval("Sample in ['Large Nodule', 'Small Nodule'] & Cluster in @_ls")],
"Cluster",
"Sample_time",
labelColor={"Nodule (12 dpi)": "#ff7f0e", "Nodule (21 dpi)": "#1f77b4"},
dt_kwargsForLegend={
"bbox_to_anchor": [0.5, -0.2],
"loc": "upper center",
"ncol": 1,
"fontsize": 10,
},
)
singleCellTools.geneEnrichInfo.calculateEnrichScoreByCellex(ad, 'raw', 'Cell type')
df_marker = ad.uns['Cell type_cellexES'].assign(
Cluster=lambda df: df["Cell type"].str.split(":").str[0]
)
df_marker["Cluster"] = df_marker["Cluster"].astype("category").cat.set_categories(
["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"]
)
# df_marker.drop(columns=["Cell type"]).query(
# "enrichScore > 0.75 & expressed_ratio > 0.1 & expressed_ratio / expressed_ratio_others > 2"
# ).merge(df_symbol, how="left", left_on="gene", right_index=True).eval(
# "Symbol = Symbol.fillna('') \n other_designations = other_designations.fillna('')",
# engine="python",
# ).to_excel(f"{dir_result}/20220607_enrichGene.xlsx")
ad.uns['Cell type_cellexES'].query(
"enrichScore > 0.75 & expressed_ratio > 0.1 & expressed_ratio / expressed_ratio_others > 2"
)
| gene | Cell type | enrichScore | expressed_ratio | expressed_ratio_others | |
|---|---|---|---|---|---|
| 69 | GLYMA_01G008000 | 0: Cortex | 0.818345 | 0.111614 | 0.028928 |
| 83 | GLYMA_01G009600 | 0: Cortex | 0.811713 | 0.526208 | 0.197464 |
| 167 | GLYMA_01G019700 | 0: Cortex | 0.802225 | 0.364645 | 0.147984 |
| 224 | GLYMA_01G027200 | 0: Cortex | 0.783761 | 0.193011 | 0.064997 |
| 237 | GLYMA_01G028600 | 0: Cortex | 0.944330 | 0.110997 | 0.016844 |
| ... | ... | ... | ... | ... | ... |
| 589547 | GLYMA_20G184900 | 9: Stele | 0.800815 | 0.100629 | 0.008696 |
| 589627 | GLYMA_20G195100 | 9: Stele | 0.753702 | 0.406709 | 0.072599 |
| 589686 | GLYMA_20G203100 | 9: Stele | 0.868116 | 0.585954 | 0.031447 |
| 589713 | GLYMA_20G206900 | 9: Stele | 0.884120 | 0.606918 | 0.028496 |
| 589769 | GLYMA_20G214200 | 9: Stele | 0.948621 | 0.220126 | 0.005202 |
3626 rows × 5 columns
df_marker.drop(columns=["Cell type"]).query("expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
| gene | enrichScore | expressed_ratio | expressed_ratio_others | Cluster | |
|---|---|---|---|---|---|
| 45735 | GLYMA_04G079600 | 0.951995 | 0.213731 | 0.005821 | 10 |
| 58903 | GLYMA_10G179400 | 0.952461 | 0.321244 | 0.009483 | 10 |
| 61374 | GLYMA_11G200300 | 0.917594 | 0.310881 | 0.009715 | 10 |
| 83285 | GLYMA_03G113200 | 0.961702 | 0.207705 | 0.004404 | 11 |
| 91803 | GLYMA_07G195300 | 0.971626 | 0.232831 | 0.009420 | 11 |
| ... | ... | ... | ... | ... | ... |
| 586559 | GLYMA_19G015800 | 0.915971 | 0.250524 | 0.007338 | 9 |
| 588247 | GLYMA_19G258700 | 0.943305 | 0.210692 | 0.004736 | 9 |
| 589098 | GLYMA_20G126500 | 0.930998 | 0.250524 | 0.007648 | 9 |
| 589543 | GLYMA_20G184300 | 0.930953 | 0.292453 | 0.009589 | 9 |
| 589769 | GLYMA_20G214200 | 0.948621 | 0.220126 | 0.005202 | 9 |
173 rows × 5 columns
dt_marker = (
df_marker.query(
"enrichScore > 0.75 & expressed_ratio > 0.1 & expressed_ratio / expressed_ratio_others > 2"
)
.groupby("Cluster")["gene"]
.agg(list)
.to_dict()
)
dt_spercificGenes = (
df_marker.query(
"expressed_ratio > 0.2 &expressed_ratio_others < 0.01"
)
.groupby("Cluster")["gene"]
.agg(list)
.to_dict()
)
ls_c12EnrichedGenes = dt_marker['12']
ls_c12SpercificGenes= dt_spercificGenes['12']
_ls_cluster = list(dt_marker.keys()) | F(sorted, key=int)
_ls_snfPropotion = [len([y for y in dt_marker[x] if y in ls_knownSnfGenes]) / len(dt_marker[x]) * 100 for x in _ls_cluster]
fig, ax = plt.subplots(figsize=(6, 3))
# sns.barplot(y=["1", "2", "3"], x=[100, 100, 100], palette=["#D3D3D3"])
sns.barplot(
x=_ls_cluster,
y=_ls_snfPropotion,
palette=["black"],
)
plt.ylabel("Percentage of known SNF genes")
plt.ylim(0, 12)
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
plt.xticks(
rotation=-90, ha='center'
)
sns.despine()
plt.xticks(rotation=0);
len(ls_c12EnrichedGenes), len(ls_c12SpercificGenes), len(
ls_c12EnrichedGenes | F(filter, lambda x: x in ls_knownSnfGenes) | F(list)
), len(ls_c12SpercificGenes | F(filter, lambda x: x in ls_knownSnfGenes) | F(list))
(311, 33, 28, 2)
sc.pl.heatmap(
ad,
{x: y for x, y in dt_marker.items() if y},
"Cluster",
layer="normalize_log",
cmap="Reds",
figsize=(10, 15), standard_scale='var'
)
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
{x:len(y) for x,y in dt_marker.items() if y}
{'10': 3, '11': 6, '12': 33, '14': 50, '5': 3, '6': 2, '8': 7, '9': 28}
2 / 33
0.06060606060606061
{x:[z for z in y if z in ls_knownSnfGenes] for x,y in dt_marker.items() if y}
{'5': [],
'6': [],
'8': [],
'9': [],
'10': [],
'11': [],
'12': ['GLYMA_15G048400', 'GLYMA_07G025800'],
'14': []}
_df = pd.Series({x:len([z for z in y if z in ls_knownSnfGenes]) / len(y) for x,y in dt_marker.items() if y})
sns.barplot(data = _df.reset_index(), x='index', y=0, palette=singleCellTools.basic.getadataColor(ad, 'Cluster'))
plt.ylabel('Percentage')
plt.xlabel('')
plt.axhline(len(ls_knownSnfGenes) / ad.shape[1], ls='--', color='black')
plt.title('Propotion of known SNF genes')
Text(0.5, 1.0, 'Propotion of known SNF genes')
_dt_marker = dt_marker >> F(lambda j: {x: [z for z in y if z in ad_stAlign.var.index] for x, y in j.items() if y})
singleCellTools.geneEnrichInfo.getAUCellScore(ad_stAlign, _dt_marker, 'raw', threads=14, label='scUpRegGenes_AUC')
Create regulons from a dataframe of enriched features. Additional columns saved: []
_ad = singleCellTools.plotting.obsmToObs(ad_stAlign, 'scUpRegGenes_AUC')
sc.pl.umap(_ad, color=_ad.uns['plot_obsm'], cmap='Reds')
sc.pl.spatial(
_ad,
color=_ad.uns['plot_obsm'],
size=0.12,
# alpha=0.75,
img_key="cross", na_in_legend=False,
cmap='Reds'
)
# plt.legend(ncol=1, loc='upper left', bbox_to_anchor=(0,-0.2), frameon=False)
from jpy_tools.otherTools import getGoDesc
df_go = pd.read_table("/data/Zhaijx/liuzj/data/soybase_IPF/soybase_soybean_go_term.tsv")
df_goDesc = getGoDesc(df_go["goTerm"].unique().tolist())
_dt = df_goDesc["hitGO"].to_dict()
df_termGene = (
df_go[["goTerm", "gene"]]
.assign(goTerm=lambda df: df["goTerm"].map(_dt))
.rename(columns={"gene": "Gene stable ID", "goTerm": "GO term accession"})
)
df_termName = df_goDesc.reset_index()[["hitGO", "hitName"]].rename(
columns={"index": "GO term accession", "hitName": "GO term name"}
)
dt_goToCat = df_go.set_index("goTerm")["cate"].to_dict()
100%|██████████| 5017/5017 [01:16<00:00, 65.89it/s] 2022-11-03 13:50:08.631 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0007126, target : GO:0051321 2022-11-03 13:50:08.633 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016568, target : GO:0006325 2022-11-03 13:50:08.636 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006944, target : GO:0061025 2022-11-03 13:50:08.642 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0007067, target : GO:0000278 2022-11-03 13:50:08.643 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006342, target : GO:0031507 2022-11-03 13:50:08.646 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0070838, target : GO:0030001 2022-11-03 13:50:08.647 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0009814, target : GO:0098542 2022-11-03 13:50:08.649 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0007050, target : GO:0051726 2022-11-03 13:50:08.651 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0009816, target : GO:0042742 2022-11-03 13:50:08.654 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0009817, target : GO:0050832 2022-11-03 13:50:08.656 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015992, target : GO:1902600 2022-11-03 13:50:08.658 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0042787, target : GO:0006511 2022-11-03 13:50:08.659 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006333, target : GO:0006325 2022-11-03 13:50:08.661 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0034613, target : GO:0008104 2022-11-03 13:50:08.662 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0080055, target : GO:0015706 2022-11-03 13:50:08.664 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0023014, target : GO:0007165 2022-11-03 13:50:08.668 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0044267, target : GO:0019538 2022-11-03 13:50:08.669 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010388, target : GO:0000338 2022-11-03 13:50:08.673 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0032312, target : GO:0043087 2022-11-03 13:50:08.674 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016246, target : GO:0035194 2022-11-03 13:50:08.676 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015696, target : GO:0072488 2022-11-03 13:50:08.678 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016337, target : GO:0098609 2022-11-03 13:50:08.683 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015991, target : GO:1902600 2022-11-03 13:50:08.684 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006464, target : GO:0036211 2022-11-03 13:50:08.686 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006344, target : GO:0070829 2022-11-03 13:50:08.688 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0007243, target : GO:0035556 2022-11-03 13:50:08.690 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010107, target : GO:1990573 2022-11-03 13:50:08.693 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:2000072, target : GO:1900150 2022-11-03 13:50:08.700 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006827, target : GO:0034755 2022-11-03 13:50:08.701 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0048554, target : GO:0043085 2022-11-03 13:50:08.703 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0033587, target : GO:0009423 2022-11-03 13:50:08.704 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0072661, target : GO:0072659 2022-11-03 13:50:08.708 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006348, target : GO:0031509 2022-11-03 13:50:08.712 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0006461, target : GO:0065003 2022-11-03 13:50:08.713 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0032313, target : GO:0043087 2022-11-03 13:50:08.716 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010204, target : GO:0002758 2022-11-03 13:50:08.721 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010163, target : GO:0071805 2022-11-03 13:50:08.724 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0001789, target : GO:0003376 2022-11-03 13:50:08.726 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015785, target : GO:0072334 2022-11-03 13:50:08.728 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0032862, target : GO:0090630 2022-11-03 13:50:08.730 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0070919, target : GO:0030422 2022-11-03 13:50:08.733 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015784, target : GO:1990570 2022-11-03 13:50:08.734 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0009870, target : GO:0002758 2022-11-03 13:50:08.737 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0060145, target : GO:0009616 2022-11-03 13:50:08.739 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0043091, target : GO:1903826 2022-11-03 13:50:08.742 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0032857, target : GO:0090630 2022-11-03 13:50:08.744 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:2000021, target : GO:0050801 2022-11-03 13:50:08.745 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015688, target : GO:0033214 2022-11-03 13:50:08.746 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0043044, target : GO:0006338 2022-11-03 13:50:08.751 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0035280, target : GO:0070922 2022-11-03 13:50:08.753 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0009623, target : GO:0050832 2022-11-03 13:50:08.754 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015809, target : GO:1903826 2022-11-03 13:50:08.755 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015684, target : GO:0006826 2022-11-03 13:50:08.760 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0000169, target : GO:0000161 2022-11-03 13:50:08.761 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0042493, target : GO:0009410 2022-11-03 13:50:08.763 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0071435, target : GO:0097623 2022-11-03 13:50:08.765 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0043241, target : GO:0032984 2022-11-03 13:50:08.770 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015238, target : GO:0042910 2022-11-03 13:50:08.772 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016619, target : GO:0004471 2022-11-03 13:50:08.773 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0008026, target : GO:0004386 2022-11-03 13:50:08.774 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004091, target : GO:0052689 2022-11-03 13:50:08.777 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004872, target : GO:0038023 2022-11-03 13:50:08.780 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016876, target : GO:0004812 2022-11-03 13:50:08.782 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004004, target : GO:0003724 2022-11-03 13:50:08.783 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0008536, target : GO:0031267 2022-11-03 13:50:08.787 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0008601, target : GO:0019888 2022-11-03 13:50:08.790 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004012, target : GO:0140326 2022-11-03 13:50:08.790 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016820, target : GO:0042626 2022-11-03 13:50:08.791 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0008060, target : GO:0005096 2022-11-03 13:50:08.793 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005089, target : GO:0005085 2022-11-03 13:50:08.795 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0019201, target : GO:0050145 2022-11-03 13:50:08.795 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0042936, target : GO:0071916 2022-11-03 13:50:08.798 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005086, target : GO:0005085 2022-11-03 13:50:08.799 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0022891, target : GO:0022857 2022-11-03 13:50:08.800 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015326, target : GO:0015174 2022-11-03 13:50:08.801 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0042282, target : GO:0004420 2022-11-03 13:50:08.802 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0017137, target : GO:0031267 2022-11-03 13:50:08.805 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004003, target : GO:0003678 2022-11-03 13:50:08.805 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005083, target : GO:0030695 2022-11-03 13:50:08.806 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0009540, target : GO:0052662 2022-11-03 13:50:08.808 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0032947, target : GO:0060090 2022-11-03 13:50:08.810 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0051739, target : GO:0008519 2022-11-03 13:50:08.812 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0043764, target : GO:0103118 2022-11-03 13:50:08.813 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0000975, target : GO:0000976 2022-11-03 13:50:08.814 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015198, target : GO:0035673 2022-11-03 13:50:08.816 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015181, target : GO:0061459 2022-11-03 13:50:08.817 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0090450, target : GO:1990003 2022-11-03 13:50:08.819 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0033613, target : GO:0140297 2022-11-03 13:50:08.821 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005098, target : GO:0005096 2022-11-03 13:50:08.822 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0030675, target : GO:0005096 2022-11-03 13:50:08.824 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0050347, target : GO:0052923 2022-11-03 13:50:08.824 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0071992, target : GO:0044604 2022-11-03 13:50:08.825 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005487, target : GO:0017056 2022-11-03 13:50:08.826 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004367, target : GO:0047952 2022-11-03 13:50:08.829 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0044212, target : GO:0000976 2022-11-03 13:50:08.830 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005100, target : GO:0005096 2022-11-03 13:50:08.832 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010474, target : GO:0080048 2022-11-03 13:50:08.832 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010475, target : GO:0080047 2022-11-03 13:50:08.834 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005097, target : GO:0005096 2022-11-03 13:50:08.835 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0001104, target : GO:0003712 2022-11-03 13:50:08.838 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0001053, target : GO:0016987 2022-11-03 13:50:08.840 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0008891, target : GO:0003973 2022-11-03 13:50:08.842 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0001158, target : GO:0000987 2022-11-03 13:50:08.845 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0017048, target : GO:0031267 2022-11-03 13:50:08.847 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004647, target : GO:0036424 2022-11-03 13:50:08.850 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0030385, target : GO:0103012 2022-11-03 13:50:08.852 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015197, target : GO:1904680 2022-11-03 13:50:08.853 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0045153, target : GO:0008121 2022-11-03 13:50:08.854 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004008, target : GO:0043682 2022-11-03 13:50:08.856 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0001047, target : GO:0001046 2022-11-03 13:50:08.858 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0070361, target : GO:0001018 2022-11-03 13:50:08.862 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0003826, target : GO:0003863 2022-11-03 13:50:08.862 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0004147, target : GO:0043754 2022-11-03 13:50:08.863 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0030359, target : GO:0019888 2022-11-03 13:50:08.865 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0043140, target : GO:0043138 2022-11-03 13:50:08.867 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0010175, target : GO:0140338 2022-11-03 13:50:08.872 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0080138, target : GO:0046715 2022-11-03 13:50:08.874 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0015088, target : GO:0005375 2022-11-03 13:50:08.879 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0003996, target : GO:0004467 2022-11-03 13:50:08.881 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0052669, target : GO:0052668 2022-11-03 13:50:08.884 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016021, target : GO:0016020 2022-11-03 13:50:08.889 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0008274, target : GO:0000931 2022-11-03 13:50:08.891 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0043234, target : GO:0032991 2022-11-03 13:50:08.892 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0000790, target : GO:0000785 2022-11-03 13:50:08.893 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005719, target : GO:0000791 2022-11-03 13:50:08.895 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0030529, target : GO:1990904 2022-11-03 13:50:08.896 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005887, target : GO:0005886 2022-11-03 13:50:08.898 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0016023, target : GO:0031410 2022-11-03 13:50:08.901 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0005720, target : GO:0000792 2022-11-03 13:50:08.904 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0000784, target : GO:0000781 2022-11-03 13:50:08.905 | WARNING | jpy_tools.otherTools:getGoDesc:605 - query : GO:0000798, target : GO:0008278
ad_stAlign.obs['n_counts'].mean(), ad_stAlign.obs['n_genes'].mean()
(695.0361, 224.3304665102945)
cpro = importr('clusterProfiler')
rBase = importr("base")
R = ro.r
dfR_termGene = py2r(df_termGene)
dfR_termName = py2r(df_termName)
def enrichmentAnalysisGO(ls_gene, ls_background, qvalueCutoff=0.2, dt_goToCat=None, pvalueCutoff=0.05):
_go = cpro.enricher(
R.c(*ls_gene),
universe=R.c(*ls_background),
TERM2GENE=dfR_termGene,
TERM2NAME=dfR_termName,
qvalueCutoff=qvalueCutoff,
pvalueCutoff=pvalueCutoff
)
df_go = r2py(rBase.as_data_frame(_go))
if df_go.empty:
return df_go
df_go["-log10Pvalue"] = np.log10(df_go["pvalue"]) * -1
if dt_goToCat:
df_go["Category"] = df_go.index.map(dt_goToCat)
df_go = df_go.sort_values(["Category", "pvalue"])
else:
df_go = df_go.sort_values(["pvalue"])
return df_go
dt_goColor = {x:y for x,y in zip(['Biological Process ', 'Cellular Component ', 'Molecular Function '], sns.palettes.color_palette())}
def _fcSplitGoTerm(x, cutoff=40):
from more_itertools import chunked
ls_x = x.split(' ')
parsed = ''
lineChrCounts = 0
for x in ls_x:
x = x.strip()
chrCounts = len(x)
lineChrCounts += chrCounts
if lineChrCounts > cutoff:
parsed = parsed + '\n' + x
lineChrCounts = 0
else:
parsed = parsed + ' ' + x
return parsed
lsDf_go = []
for cluster, ls_gene in dt_marker.items():
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat = dt_goToCat, qvalueCutoff=0.2, pvalueCutoff=0.05)
if df_go.empty:
continue
df_go.insert(0, 'Cluster', cluster)
lsDf_go.append(df_go)
fig, ax = plt.subplots(figsize=(4, df_go.shape[0] * 0.2))
sns.barplot(data=df_go, x="-log10Pvalue", y="Description", hue = 'Category', ax=ax, dodge=False, palette=dt_goColor)
plt.legend(loc='lower left', bbox_to_anchor=[1,0])
plt.ylabel('')
sns.despine(top=True, right=True)
plt.title(cluster)
plt.show()
R[write to console]: --> No gene can be mapped.... R[write to console]: --> Expected input gene ID: GLYMA_17G001600,GLYMA_20G123100,GLYMA_18G025900,GLYMA_04G060500,GLYMA_19G026800,GLYMA_13G063400 R[write to console]: --> return NULL...
_ls_go = ['GO:0009098', 'GO:0015144']
cluster = '12'
ls_gene = dt_marker[cluster]
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat = dt_goToCat, qvalueCutoff=0.2, pvalueCutoff=0.05)
df_go.insert(0, 'Cluster', '12')
df_go = df_go.query("ID in @_ls_go")
lsDf_go = []
_ls_go = ['GO:0016161', 'GO:0010310', 'GO:0009750']
cluster = '11'
ls_gene = dt_marker[cluster]
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat = dt_goToCat, qvalueCutoff=0.2, pvalueCutoff=0.05)
df_go.insert(0, 'Cluster', '11')
df_go = df_go.query("ID in @_ls_go")
lsDf_go.append(df_go)
_ls_go = ['GO:0016161', 'GO:0010310', 'GO:0009750']
cluster = '11'
ls_gene = dt_marker[cluster]
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat = dt_goToCat, qvalueCutoff=0.2, pvalueCutoff=0.05)
df_go.insert(0, 'Cluster', '11')
df_go = df_go.query("ID in @_ls_go")
lsDf_go.append(df_go)
fig, ax = plt.subplots(figsize=(4, df_go.shape[0] * 0.2))
sns.barplot(data=df_go, x="-log10Pvalue", y="Description", hue = 'Category', ax=ax, dodge=False, palette=dt_goColor)
# plt.legend(loc='lower left', bbox_to_anchor=[1,0])
ax.
plt.ylabel('')
sns.despine(top=True, right=True)
plt.title('11')
plt.show()
| Cluster | ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count | -log10Pvalue | Category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GO:0010310 | 11 | GO:0010310 | regulation of hydrogen peroxide metabolic proc... | 14/218 | 388/35944 | 1.296308e-07 | 1.732731e-05 | 1.514633e-05 | GLYMA_02G086100/GLYMA_06G061900/GLYMA_07G19530... | 14 | 6.887292 | Biological Process |
| GO:0009750 | 11 | GO:0009750 | response to fructose (GO:0009750) | 12/218 | 285/35944 | 2.033955e-07 | 2.039040e-05 | 1.782387e-05 | GLYMA_01G161500/GLYMA_02G145300/GLYMA_06G05010... | 12 | 6.691659 | Biological Process |
| GO:0016161 | 11 | GO:0016161 | beta-amylase activity (GO:0016161) | 6/218 | 17/35944 | 5.437488e-10 | 2.180433e-07 | 1.905983e-07 | GLYMA_01G203400/GLYMA_05G068000/GLYMA_11G03940... | 6 | 9.264602 | Molecular Function |
_ls = df_go.loc['GO:0010310', 'geneID'].split('/')
df_symbol.query("index in @_ls")
| Symbol | other_designations | |
|---|---|---|
| geneID | ||
| GLYMA_16G141500 | BZIP117 | bZIP transcription factor bZIP117 |
| GLYMA_06G061900 | WRKY17 | WRKY transcription factor 17 |
| GLYMA_11G121800 | LOC102661758 | nonsymbiotic hemoglobin|non-symbiotic hemoglob... |
| GLYMA_12G150500 | LOC100814871 | stem-specific protein TSJT1 |
| GLYMA_02G086100 | LOC100811587 | RNA-dependent RNA polymerase 1 |
| GLYMA_09G073600 | LOC100806761 | sucrose synthase |
| GLYMA_18G273200 | LOC100787893 | protein DOWNY MILDEW RESISTANCE 6 |
| GLYMA_13G181000 | LOC100786699 | stem-specific protein TSJT1 |
| GLYMA_13G242100 | LOC100816141 | stem-specific protein TSJT1 |
| GLYMA_17G227900 | LOC100806753 | fructan 6-exohydrolase |
| GLYMA_15G071300 | LOC100804761 | uncharacterized protein LOC100804761 |
| GLYMA_18G208800 | LOC100792833 | probable WRKY transcription factor 33 |
| GLYMA_15G072400 | LOC100780697 | uncharacterized protein LOC100780697 |
| GLYMA_07G195300 | LOC100776430 | stem-specific protein TSJT1|uncharacterized pr... |
len(_ls)
14
lsDf_go = []
for cluster in ['0']:
ls_gene = dt_marker[cluster]
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat = dt_goToCat)
if df_go.empty:
continue
df_go.insert(0, 'Cluster', cluster)
df_go.loc['GO:0016717', 'Description'] = 'oxidoreductase activity (GO:0016717)'
lsDf_go.append(df_go)
fig, ax = plt.subplots(figsize=(4, df_go.shape[0] * 0.2))
sns.barplot(data=df_go, x="-log10Pvalue", y="Description", hue = 'Category', ax=ax, dodge=False, palette=dt_goColor)
plt.legend(loc='lower left', bbox_to_anchor=[1,0])
plt.xlabel("-log$_{10}$Pvalue")
plt.ylabel('')
sns.despine(top=True, right=True)
plt.title(cluster)
plt.show()
lsDf_go = []
for cluster in ["7", "11", "12"]:
ls_gene = dt_marker[cluster]
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat=dt_goToCat)
if df_go.empty:
continue
df_go.insert(0, "Cluster", cluster)
lsDf_go.append(df_go)
fig, ax = plt.subplots(figsize=(4, df_go.shape[0] * 0.2))
sns.barplot(
data=df_go,
x="-log10Pvalue",
y="Description",
hue="Category",
ax=ax,
dodge=False,
palette=dt_goColor,
)
plt.legend(loc="lower left", bbox_to_anchor=[1, 0])
plt.xlabel("-log$_{10}$Pvalue")
plt.ylabel("")
sns.despine(top=True, right=True)
plt.title(cluster)
plt.show()
lsDf_go = []
for cluster in ['11']:
ls_gene = dt_marker[cluster]
df_go = enrichmentAnalysisGO(ls_gene, ad.var.index.to_list(), dt_goToCat = dt_goToCat)
if df_go.empty:
continue
df_go.insert(0, 'Cluster', cluster)
lsDf_go.append(df_go)
fig, ax = plt.subplots(figsize=(4, df_go.shape[0] * 0.2))
sns.barplot(data=df_go, x="-log10Pvalue", y="Description", hue = 'Category', ax=ax, dodge=False, palette=dt_goColor)
plt.legend(loc='lower left', bbox_to_anchor=[1,0])
plt.xlabel("-log$_{10}$Pvalue")
plt.ylabel('')
sns.despine(top=True, right=True)
plt.title(cluster)
plt.show()
fig, ax = plt.subplots(figsize=(12, 0.5))
sns.barplot(
data=df_go.query("ID in ['GO:0005983', 'GO:0016161']"),
x="-log10Pvalue",
y="Description",
# hue="Category",
ax=ax,
dodge=False,
color="black"
# palette=dt_goColor,
)
# plt.legend(loc="lower left", bbox_to_anchor=[1, 0])
plt.xlabel("-log$_{10}$Pvalue")
plt.ylabel("")
sns.despine(top=True, right=True)
plt.title(cluster)
plt.show()
import diffxpy.api as de
# fig, ax = plt.subplots(figsize=(6, 3))
_ls = range(15) | F(map, str) | F(filter, lambda x: x not in ['6', '8', '10', '13', '14']) | F(list)
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False, na_in_legend=False)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="", na_in_legend=False)
# ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False)
# sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="")
plt.legend(loc="upper left", bbox_to_anchor=(-0.35, -0.1), ncol=3, frameon=False)
# plt.title("Single-nucleus transcriptomes")
sns.despine()
plt.show()
singleCellTools.plotting.plotLabelPercentageInCluster(ad, "Cluster", "Sample")
plt.show()
dt_diffxpyUseSampleForEachCluster = {}
ls_allUsedSample = ad.obs[["Cluster", "Sample"]].value_counts().loc[lambda sr:sr > 100].sort_index().index
for cluster, sample in ls_allUsedSample:
if cluster not in dt_diffxpyUseSampleForEachCluster:
dt_diffxpyUseSampleForEachCluster[cluster] = []
dt_diffxpyUseSampleForEachCluster[cluster].append(sample)
ls_diffxpyResults = []
for cluster, ls_diffxpyUsedSamples in dt_diffxpyUseSampleForEachCluster.items():
print(cluster, ls_diffxpyUsedSamples)
if len(ls_diffxpyUsedSamples) <= 1:
print(f"Skip {cluster}")
continue
_ad = ad[ad.obs.eval("Cluster == @cluster & Sample in @ls_diffxpyUsedSamples"), :].copy()
singleCellTools.geneEnrichInfo.getGeneMeanAndExpressedRatioGroups(_ad, layer='raw', groupby=['Sample'])
de_res = de.test.versus_rest(_ad.layers['raw'], grouping='Sample', gene_names=_ad.var.index, sample_description=_ad.obs, quick_scale=True, noise_model='nb')
for sample in ls_diffxpyUsedSamples:
df_oneDiffxpyResult = de_res.summary_group(sample)
df_oneDiffxpyResult = df_oneDiffxpyResult.merge(
_ad.var[[f"Sample_{sample}_expressedRatio", f"Sample_{sample}_expressedCount"]].rename(columns=lambda x: x.replace(f"Sample_{sample}_", "")),
left_on='gene', right_index=True).assign(sample=sample, cluster=cluster
)
ls_diffxpyResults.append(df_oneDiffxpyResult)
0 ['Large Nodule', 'Small Nodule'] training location model: False training scale model: False iter 0: ll=87618564.355380 iter 1: ll=87618564.355380, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=87557317.361295 iter 1: ll=87557317.361295, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 1 ['Large Nodule', 'Small Nodule'] training location model: False training scale model: False iter 0: ll=75932850.679249 iter 1: ll=75932850.679249, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=75833950.477206 iter 1: ll=75833950.477206, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 2 ['Large Nodule', 'Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=67738397.219641 iter 1: ll=67738397.219641, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=69243398.052136 iter 1: ll=69243398.052136, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=67644831.402766 iter 1: ll=67644831.402766, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 3 ['Large Nodule', 'Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=46510360.401502 iter 1: ll=46510360.401502, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=43960196.686204 iter 1: ll=43960196.686204, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=44524755.308353 iter 1: ll=44524755.308353, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 4 ['Large Nodule', 'Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=64306436.931557 iter 1: ll=64306436.931557, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=65475817.224970 iter 1: ll=65475817.224970, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=64214510.324356 iter 1: ll=64214510.324356, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 5 ['Large Nodule', 'Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=48332876.439546 iter 1: ll=48332876.439546, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=47924266.050152 iter 1: ll=47924266.050152, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=48056150.823426 iter 1: ll=48056150.823426, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 6 ['Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=40029403.028887 iter 1: ll=40029403.028887, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=40650091.067875 iter 1: ll=40650091.067875, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 7 ['Large Nodule', 'Small Nodule'] training location model: False training scale model: False iter 0: ll=82189078.350980 iter 1: ll=82189078.350980, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=82724470.895233 iter 1: ll=82724470.895233, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 8 ['Large Nodule', 'Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=43242154.905432 iter 1: ll=43242154.905432, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=42769844.703970 iter 1: ll=42769844.703970, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=42549751.207883 iter 1: ll=42549751.207883, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 9 ['Large Nodule', 'Small Nodule'] training location model: False training scale model: False iter 0: ll=55556250.790290 iter 1: ll=55556250.790290, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=55513785.367721 iter 1: ll=55513785.367721, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 10 ['Large Nodule', 'Small Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=39333458.198315 iter 1: ll=39333458.198315, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=39330038.244983 iter 1: ll=39330038.244983, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=39134035.863008 iter 1: ll=39134035.863008, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 11 ['Large Nodule'] Skip 11 12 ['Large Nodule', 'Small Nodule'] training location model: False training scale model: False iter 0: ll=40042353.060870 iter 1: ll=40042353.060870, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=39997707.965499 iter 1: ll=39997707.965499, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec 13 ['Large Nodule', 'Root'] training location model: False training scale model: False iter 0: ll=34336935.875803 iter 1: ll=34336935.875803, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec training location model: False training scale model: False iter 0: ll=34485741.646029 iter 1: ll=34485741.646029, converged: 100.00% (loc: 100.00%, scale update: True), in 0.00sec
df_diffxpyResults = pd.concat(ls_diffxpyResults)
df_diffxpyResults = df_diffxpyResults.reset_index(drop=True)
df_diffxpyResults = df_diffxpyResults.query("qval < 0.01 & log2fc < -4 & expressedCount > 10")
dt_renameSample = {
"Root": "Root",
"Large Nodule": "Nodule (21 dpi)",
"Small Nodule": "Nodule (12 dpi)",
}
df_diffxpyResults = df_diffxpyResults.assign(
sample=lambda df: df["sample"].astype("category").cat.reorder_categories(['Small Nodule', 'Large Nodule', 'Root']).map(dt_renameSample),
cluster=lambda df: df["cluster"].astype("category").cat.reorder_categories(['0', '1', '2', '3', '4', '5', '6', '7','8', '9', '10', '12', '13']),
)
ls_diffxpyClusterUniqueGene = df_diffxpyResults['gene'].value_counts().loc[lambda x:x==1].index.to_list()
df_diffxpyResults = df_diffxpyResults.assign(clusterUnique=lambda df: np.where(df['gene'].isin(ls_diffxpyClusterUniqueGene), 'Yes', 'No'))
df_diffxpyResults['clusterUnique'] = df_diffxpyResults['clusterUnique'].astype('category').cat.set_categories(['Yes', 'No'])
df_diffxpyResults.value_counts(['clusterUnique']) / len(df_diffxpyResults)
clusterUnique No 0.553776 Yes 0.446224 dtype: float64
(
so.Plot(
data=df_diffxpyResults.value_counts(['sample', 'cluster', 'clusterUnique']).rename("Counts").sort_index().reset_index().rename(columns=str.capitalize),
x='Cluster', y='Counts', fill='Clusterunique', color='Sample'
)
.add(so.Bar(width=0.6, edgewidth=1.5), so.Dodge(by=['color']), so.Stack())
.scale(color="deep")
.theme(dt_snsStyle)
.label(fill='Only identified in\n this cluster')
.layout(size=(10,5))
)
df_diffxpyResults['log2fc'] = df_diffxpyResults['log2fc'] * -1
# df_diffxpyResults.to_excel(f"{dir_result}/cluster_within_deg.xlsx", index=False)
df_diffxpyResults = df_diffxpyResults.assign(cluster_sample=lambda df:df['cluster'].astype(str) + ': ' + df['sample'].astype(str))
df_upset = df_diffxpyResults.pivot_table(values='expressedCount', index='gene', columns='cluster_sample').notna().value_counts()
import upsetplot
upsetplot.plot(df_upset, min_subset_size=10, sort_by='cardinality')
{'matrix': <AxesSubplot:>,
'shading': <AxesSubplot:>,
'totals': <AxesSubplot:>,
'intersections': <AxesSubplot:ylabel='Intersection size'>}
import lmdb
import tqdm
import pickle
env = lmdb.open("/data/Zhaijx/liuzj/projects/singleCell/soybean/03_web/all_spatial", map_size=1099511627776)
txn = env.begin(write=True)
for gene in tqdm.tqdm(ad_stAlign.var.index):
value = pickle.dumps(ad_stAlign[:, gene].layers['normalize_log'].A.reshape(-1))
txn.put(key=gene.encode(), value=value)
100%|██████████| 27744/27744 [05:22<00:00, 86.13it/s]
value = pickle.dumps(ad_stAlign.obsm['spatial'][:, 0] * ad_stAlign.uns['spatial']['Soybean']['scalefactors']['tissue_cross_scalef'])
txn.put(key='x'.encode(), value=value)
value = pickle.dumps(ad_stAlign.obsm['spatial'][:, 1] * ad_stAlign.uns['spatial']['Soybean']['scalefactors']['tissue_cross_scalef'])
txn.put(key='y'.encode(), value=value)
True
value = pickle.dumps(ad_stAlign.uns['spatial']['Soybean']['images']['cross'])
txn.put(key='image'.encode(), value=value)
True
txn.commit()
env.close()
penguins = sns.load_dataset('penguins')
fig, ax = plt.subplots(figsize=(8,5))
ax = sc.pl.umap(ad, color="Cluster", legend_loc="on data", show=False, na_in_legend=False, ax=ax)
sc.pl.umap(ad, color="Cell type", show=False, ax=ax, title="", na_in_legend=False)
plt.title('snRNA-seq', fontsize=18)
plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.05), ncol=3, frameon=False)
for text in ax.texts:
text.set_fontsize(16)
plt.savefig("/public/home/liuzj/share/scSoybean/allUmap_bc/leiden.png", dpi=300, bbox_inches='tight')
plt.show()
from joblib import Parallel, delayed
from more_itertools import chunked
ad_merged = singleCellTools.geneEnrichInfo._mergeData(ad, 'Cluster', layer='raw')
singleCellTools.basic.initLayer(ad_merged, total=1e6)
_ls_colors = ['#1f77b4',
'#ff7f0e',
'#279e68',
'#ff9896',
'#aa40fc',
'#8c564b',
'#e377c2',
'#b5bd61',
'#17becf',
'#aec7e8',
'#ffbb78',
'#98df8a',
'#d62728',
'#c5b0d5',
'#c49c94']
def batchBarplot(ad_merged, ls_gene):
for gene in ls_gene:
sns.barplot(
x=ad_merged.obs.index, y=ad_merged[:, gene].X.reshape(-1), palette=_ls_colors
)
plt.xlabel('Cluster')
plt.ylabel('Expression\n(Log CPM)')
plt.title(gene)
plt.savefig(f"/public/home/liuzj/share/scSoybean/allBar/{gene}.bar.png", dpi=100)
plt.close()
ls_chunkedGene = chunked(ad.var.index.to_list(), 1000)
Parallel(12)(delayed(batchBarplot)(ad_merged, ls_gene) for ls_gene in ls_chunkedGene)
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
path_ljUcIcDEG = '/data/Zhaijx/liuzj/projects/singleCell/soybean/01_data/lj_pbj/lj_uc_ic.tsv'
path_ljGmOrth = '/data/Zhaijx/liuzj/data/ortholog/plant/parsed/1v1_soybean_maize_lotus/soybase_2__v__lotus.1v1.tsv'
path_allGmOrth = '/data/Zhaijx/liuzj/data/ortholog/plant/results_soybean_maize_lotus/Results_Mar02/Orthologues/Orthologues_soybase_2/soybase_2__v__lotus.tsv'
df_allGmOrth = pd.read_table(path_allGmOrth)
df_allGmOrth["lotus"] = df_allGmOrth["lotus"].str.split(',')
df_allGmOrth = df_allGmOrth.explode("lotus")
df_allGmOrth['lotus'] = df_allGmOrth['lotus'].map(lambda x: x.split('|')[1].split('.')[0])
def _fc(df):
return ','.join(df['soybase_2'].to_list()).split(',') >> F(map, str.strip) >> F(map, lambda x: x.split('glyma.Wm82.gnm2.ann1.')[1].replace('Glyma.', 'GLYMA_').split('.')[0]) >> F(set) >> F(sorted) >> F(list)
dt_allLj2Gm = df_allGmOrth.groupby("lotus").apply(_fc).to_dict()
df_ljDeg = pd.read_table(path_ljUcIcDEG)
df_ljGmOrth = pd.read_table(path_ljGmOrth)
dt_ljGmOrth = df_ljGmOrth.set_index('lotus')['soybase_2'].to_dict()
ad_ifz = ad[ad.obs.eval("Cluster in ['0', '7', '11', '12']")].copy()
df_ljDeg = (
df_ljDeg.assign(
gene=lambda df: df["Transcript ID \n(Lj 3.0)"].str.split(".").str[0]
)
.assign(gmGene=lambda df: df["gene"].map(dt_ljGmOrth))
.dropna(subset=["gmGene"])
)
df_ljDeg = df_ljDeg.rename(columns = lambda x:x.replace('\n', '_'))
df_ljDeg['-log10FDR'] = - np.log10(df_ljDeg['FDR'])
df_ljDeg = df_ljDeg.query("`-log10FDR` > 4 & (`Log2FC_(IC vs UC)` < -10 | `Log2FC_(IC vs UC)` > 10)")
df_ljDeg = df_ljDeg.pipe(
lambda df: df.assign(category=np.where(df["Log2FC_(IC vs UC)"] > 0, "IC", "UC"))
)
df_ljDeg_1 = df_ljDeg
dt_ljDeg = df_ljDeg.query('gmGene in @ad.var.index').groupby('category')['gmGene'].agg(list).to_dict()
dt_ljDeg = {x: dt_ljDeg[x] for x in ["UC", "IC"]}
sc.pl.heatmap(
ad_ifz,
dt_ljDeg,
"Cluster",
cmap="Reds",
layer="normalize_log",
standard_scale="var",
figsize=(6, 6),
)
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
singleCellTools.geneEnrichInfo.getAUCellScore_r(
ad_ifz,
dt_ljDeg,
layer="raw",
)
R[write to console]: Quantiles for the number of genes detected by cell: (Non-detected genes are shuffled at the end of the ranking. Keep it in mind when choosing the threshold for calculating the AUC).
min 1% 5% 10% 50% 100% 460.00 657.07 742.00 799.00 1246.00 3613.00
from matplotlib import ticker
ls_title = [
"Up-regulated genes\nin uninfected cell",
"Up-regulated genes\nin infected cell",
]
with plt.rc_context({"figure.figsize": (6, 3)}):
_ad = singleCellTools.plotting.obsmToObs(ad_ifz, "AUCell")
fig, axs = plt.subplots(1, 2)
axs = axs.reshape(-1)
for label, title, ax in zip(_ad.uns["plot_obsm"], ls_title, axs):
if label == "Cortex":
sc.pl.umap(
_ad, color=label, title=title, cmap="Reds", show=False, ax=ax, vmin=0.15
)
else:
sc.pl.umap(_ad, color=label, title=title, cmap="Reds", show=False, ax=ax)
plt.sca(ax)
plt.xlim(3, 13)
plt.tight_layout()
df_ljDeg = pd.read_table(path_ljUcIcDEG)
df_ljGmOrth = pd.read_table(path_ljGmOrth)
dt_ljGmOrth = df_ljGmOrth.set_index('lotus')['soybase_2'].to_dict()
df_ljDegAll = (
df_ljDeg.assign(
gene=lambda df: df["Transcript ID \n(Lj 3.0)"].str.split(".").str[0]
)
.assign(gmGene=lambda df: df["gene"].map(dt_allLj2Gm))
.dropna(subset=["gmGene"])
)
df_ljDegAll = df_ljDegAll.rename(columns = lambda x:x.replace('\n', '_'))
df_ljDegAll['-log10FDR'] = - np.log10(df_ljDegAll['FDR'])
df_ljDegAll = df_ljDegAll.query("`-log10FDR` > 4 & (`Log2FC_(IC vs UC)` < -10 | `Log2FC_(IC vs UC)` > 10)")
df_ljDegAll = df_ljDegAll.pipe(
lambda df: df.assign(category=np.where(df["Log2FC_(IC vs UC)"] > 0, "IC", "UC"))
)
_ls = df_ljDeg_1['gene']
df_ljDegAll = df_ljDegAll.query("`gene` not in @_ls", engine='python').explode("gmGene")
df_ljDegAll = pd.concat([df_ljDeg_1, df_ljDegAll])
df_ljDegAll = df_ljDegAll.sort_values(['category', 'Transcript ID _(Lj 3.0)'])
df_ljDegAll.to_excel(f"{dir_result}/ljDegAll.xlsx")
dt_ljDegAll = df_ljDegAll.groupby('category')['gmGene'].agg(lambda x: list(set(x))).to_dict()
dt_ljDegAll = {x: [z for z in y if z in ad.var.index] >> F(set) >> F(list) for x, y in dt_ljDegAll.items()}
dt_ljDegAll = {x: dt_ljDegAll[x] for x in ["UC", "IC"]}
sc.pl.heatmap(
ad_ifz,
dt_ljDegAll,
"Cluster",
cmap="Reds",
layer="normalize_log",
standard_scale="var",
figsize=(6, 6),
)
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
singleCellTools.geneEnrichInfo.getAUCellScore_r(
ad_ifz,
dt_ljDegAll,
layer="raw",
label='all_ljDeg'
)
R[write to console]: Quantiles for the number of genes detected by cell: (Non-detected genes are shuffled at the end of the ranking. Keep it in mind when choosing the threshold for calculating the AUC).
min 1% 5% 10% 50% 100% 460.00 657.07 742.00 799.00 1246.00 3613.00
from matplotlib import ticker
ls_title = [
"Up-regulated genes\nin uninfected cell\n(All paralogs)",
"Up-regulated genes\nin infected cell\n(All paralogs)",
]
with plt.rc_context({"figure.figsize": (6, 3)}):
_ad = singleCellTools.plotting.obsmToObs(ad_ifz, "all_ljDeg")
fig, axs = plt.subplots(1, 2)
axs = axs.reshape(-1)
for label, title, ax in zip(_ad.uns["plot_obsm"], ls_title, axs):
if label == "Cortex":
sc.pl.umap(
_ad, color=label, title=title, cmap="Reds", show=False, ax=ax, vmin=0.15
)
else:
sc.pl.umap(_ad, color=label, title=title, cmap="Reds", show=False, ax=ax)
plt.sca(ax)
plt.xlim(3, 13)
plt.tight_layout()
df_ljGem = pd.read_excel('/data/Zhaijx/liuzj/projects/singleCell/soybean/01_data/lj_pbj/pbi13778-sup-0002-tables1 (1).xlsx')
df_ljGem = df_ljGem.assign(gene=lambda df: df["Transcript"].str.split(".").str[0]).drop(
columns="Transcript"
).groupby("gene").agg("sum")
df_ljGem = df_ljGem.loc[df_ljGem.index.isin(dt_ljGmOrth)]
df_ljGem.index = df_ljGem.index.map(dt_ljGmOrth)
ad_lj = sc.AnnData(df_ljGem.T)
ad_lj.var_names_make_unique()
ad_lj.layers['rpkm'] = ad_lj.X.copy()
ad_lj.obs["category"] = ["IC", "IC", "UC", "UC", "UC", "UC"]
df_scMarker = ad.uns['Cell type_cellexES'].assign(
Cluster=lambda df: df["Cell type"].str.split(":").str[0]
)
df_scMarker["Cluster"] = df_scMarker["Cluster"].astype("category").cat.set_categories(
["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"]
)
df_scMarker.query(
"enrichScore > 0.75 & expressed_ratio > 0.1 & expressed_ratio / expressed_ratio_others > 2"
).merge(df_symbol, left_on="gene", right_index=True, how="left").sort_values(
["Cell type", "enrichScore"], ascending=[True, False]
).to_excel(f"{dir_result}/cluster_marker.xlsx")
dt_scMarker = (
df_scMarker.query(
"enrichScore > 0.75 & expressed_ratio > 0.1 & expressed_ratio / expressed_ratio_others > 2 & gene in @ad_lj.var.index"
)
.groupby("Cluster")["gene"]
.agg(list)
.to_dict()
)
axs = sc.pl.heatmap(
ad_lj,
{x: dt_scMarker[x] for x in ["0", "7", "11", "12"]},
"category",
cmap="Reds",
standard_scale="var",
figsize=(8, 4),
show=False,
)
plt.sca(axs["groupby_ax"])
plt.ylabel("Category")
plt.show()
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
ls_gene = [
"GLYMA_10G199100",
"GLYMA_10G199000",
"GLYMA_20G191200",
"GLYMA_10G198800",
"GLYMA_08G012800",
"GLYMA_05G205900",
"GLYMA_06G301500",
"GLYMA_15G098100",
"GLYMA_08G025500",
"GLYMA_17G150100",
"GLYMA_05G068000",
"GLYMA_01G203400",
"GLYMA_11G039400",
"GLYMA_13G215000",
]
ls_name = [
"LBA",
"LBC1",
"LBC2",
"LBC3",
"SYMREM1.1",
"SYMREM1.2",
"BMY1-1",
"BMY1-2",
"BMY1-3",
"BMY2",
"BMY3-1",
"BMY3-2",
"BMY3-3",
"BMY9",
]
axs = sc.pl.dotplot(
ad,
ls_gene,
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(6, 4),
dot_min=0.15,
dot_max=0.9, vmax=1.75
)
plt.sca(axs["mainplot_ax"])
plt.xticks(rotation=0)
plt.yticks([x + 0.5 for x in range(len(ls_name))], ls_name, style="italic")
plt.show()
{"SYMREM1.1": "GLYMA_08G012800", "N56": "GLYMA_13G024700", "ENOD55": "GLYMA_02G204500"}
{'SYMREM1.1': 'GLYMA_08G012800',
'N56': 'GLYMA_13G024700',
'ENOD55': 'GLYMA_02G204500'}
ls_gene = [
"GLYMA_10G199100",
"GLYMA_10G199000",
"GLYMA_20G191200",
"GLYMA_10G198800",
"GLYMA_08G012800",
"GLYMA_13G024700",
"GLYMA_02G204500",
]
ls_name = ["LBA", "LBC1", "LBC2", "LBC3", "SYMREM1.1", "N56", "ENOD55"]
axs = sc.pl.dotplot(
ad,
ls_gene,
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(10, 4),
)
plt.sca(axs["mainplot_ax"])
plt.xticks(rotation=0)
plt.yticks([x + 0.5 for x in range(len(ls_name))], ls_name, style="italic")
plt.show()
ls_gene = [
"GLYMA_06G301500",
"GLYMA_15G098100",
"GLYMA_08G025500",
"GLYMA_17G150100",
"GLYMA_05G068000",
"GLYMA_01G203400",
"GLYMA_11G039400",
"GLYMA_13G215000",
"GLYMA_01G058500",
"GLYMA_02G116300",
"GLYMA_02G116400",
"GLYMA_20G072400",
"GLYMA_14G111800",
"GLYMA_17G216000",
]
ls_name = [
"BMY1-1",
"BMY1-2",
"BMY1-3",
"BMY2",
"BMY3-1",
"BMY3-2",
"BMY3-3",
"BMY9",
"UPS2-1",
"UPS2-2",
"UPS2-3",
"UR2",
"ASP5-1",
"ASP5-2",
]
ls_gene = [
"GLYMA_15G098100",
"GLYMA_17G150100",
"GLYMA_05G068000",
"GLYMA_01G203400",
"GLYMA_11G039400",
"GLYMA_13G215000",
"GLYMA_01G058500",
"GLYMA_02G116300",
"GLYMA_20G072400",
"GLYMA_14G111800",
"GLYMA_17G216000",
]
ls_name = [
"BMY1-2",
"BMY2",
"BMY3-1",
"BMY3-2",
"BMY3-3",
"BMY9",
"UPS2-1",
"UPS2-2",
"UR2",
"ASP5",
"ASP5",
]
ls_name = [f"{x}\n({y})" for x,y in zip(ls_gene, ls_name)]
axs = sc.pl.dotplot(
ad,
ls_gene[:6],
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(5, 5 / 11 * 5),
dot_min=0.2,
dot_max=0.8,
standard_scale="var",
)
plt.sca(axs["mainplot_ax"])
plt.yticks([x + 0.5 for x in range(len(ls_name[:6]))], ls_name[:6], style="italic", size=8)
plt.xticks(rotation=-30, ha="center", size=8)
plt.show()
axs = sc.pl.dotplot(
ad,
ls_gene[6:8],
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(5, 2 / 11 * 5),
dot_min=0.2,
dot_max=0.8,
standard_scale="var",
)
plt.sca(axs["mainplot_ax"])
plt.yticks([x + 0.5 for x in range(len(ls_name[6:8]))], ls_name[6:8], style="italic", size=8)
plt.xticks([])
plt.tick_params(bottom=False)
plt.show()
axs = sc.pl.dotplot(
ad,
ls_gene[8:],
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(5, 3 / 11 * 5),
dot_min=0.2,
dot_max=0.8,
standard_scale="var",
)
plt.sca(axs["mainplot_ax"])
plt.yticks([x + 0.5 for x in range(len(ls_name[8:]))], ls_name[8:], style="italic", size=8)
plt.xticks([])
plt.tick_params(bottom=False)
plt.show()
axs = sc.pl.dotplot(
ad,
ls_gene,
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(5, 5),
dot_min=0.2,
dot_max=0.8,
standard_scale="var",
)
plt.sca(axs["mainplot_ax"])
plt.xticks(rotation=-30, ha="center", size=8)
plt.yticks([x + 0.5 for x in range(len(ls_name))], ls_name, style="italic", size=8)
plt.show()
# aspartate aminotransferase
_ls = (
"""Glyma.01g131100
Glyma.04g080700
Glyma.05g181000
Glyma.06g082400
Glyma.06g275700
Glyma.08g138800
Glyma.11g238200
Glyma.11g238300
Glyma.14g111800
Glyma.17g216000""".split()
| F(map, lambda x: x.replace("Glyma.", "GLYMA_").upper())
| F(list)
)
_ls_name = _ls[:]
# _ls_name[-2] = "GLYMA_14G111800\n(ASP5-1)"
# _ls_name[-1] = "GLYMA_17G216000\n(ASP5-2)"
axs = sc.pl.dotplot(
ad,
_ls,
"Cluster",
layer="normalize_log",
cmap="Reds",
swap_axes=True,
show=False,
figsize=(7, 5),
vmax=1.6,
)
plt.sca(axs["mainplot_ax"])
plt.yticks([x + 0.5 for x in range(len(_ls))], _ls_name, style="italic", rotation=0, va='top')
plt.xticks(rotation=0)
plt.show()
axs = sc.pl.umap(
ad,
layer="normalize_log",
cmap="Reds",
color=ls_gene,
title=ls_name,
size=10, show=False, ncols=4
)
for ax in axs:
plt.sca(ax)
text = ax.get_title()
plt.title(text, fontdict={'style': 'italic'})
ls_gene = [
"GLYMA_08G012800",
"GLYMA_05G205900",
]
ls_name = [
"SYMREM1.1",
"SYMREM1.2",
]
axs = sc.pl.umap(
ad,
layer="normalize_log",
cmap="Reds",
color=ls_gene,
title=ls_name,
size=10, show=False
)
for ax in axs:
plt.sca(ax)
text = ax.get_title()
plt.title(text, fontdict={'style': 'italic'})
with plt.rc_context({"figure.figsize": (3, 4)}):
_ls = ["0", "7", "11"]
singleCellTools.plotting.plotLabelPercentageInCluster(
ad[ad.obs.eval("Cluster in @_ls & Sample != 'Root'")],
"Cluster",
"Sample_new",
dt_kwargsForLegend={
"bbox_to_anchor": [0.5, -0.3],
"loc": "lower center",
"ncol": 2,
},
)
ls_gene = [
"GLYMA_06G301500",
"GLYMA_15G098100",
"GLYMA_08G025500",
"GLYMA_17G150100",
"GLYMA_05G068000",
"GLYMA_01G203400",
"GLYMA_11G039400",
"GLYMA_13G215000",
]
ls_name = [
"BMY1-1",
"BMY1-2",
"BMY1-3",
"BMY2",
"BMY3-1",
"BMY3-2",
"BMY3-3",
"BMY9",
]
axs = sc.pl.umap(
ad,
layer="normalize_log",
cmap="Reds",
color=ls_gene,
title=ls_name,
ncols=2,
size=10,
show=False,
)
for ax in axs:
plt.sca(ax)
text = ax.get_title()
plt.title(text, fontdict={"style": "italic"})
import scvelo as scv
import cellrank as cr
_ls = ["0", "7", "11"]
ad_ciz = ad[ad.obs.eval("Sample in ['Large Nodule', 'Small Nodule'] & Cluster in @_ls")]
ad_ciz.X = ad_ciz.layers['raw'].copy()
sc.pp.filter_genes(ad_ciz, min_cells=10)
/public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/scanpy/preprocessing/_simple.py:251: ImplicitModificationWarning: Trying to modify attribute `.var` of view, initializing view as actual.
sc.pp.highly_variable_genes(ad_ciz, flavor='seurat_v3', n_top_genes=1000)
scv.pp.normalize_per_cell(ad_ciz)
sc.pp.log1p(ad_ciz)
ad_ciz.layers["spliced"] = ad_ciz.X
ad_ciz.layers["unspliced"] = ad_ciz.X
scv.pp.moments(ad_ciz, n_pcs=30, n_neighbors=30)
# scv.pp.moments(ad_ciz, n_neighbors=30, use_rep='X_scvi')
Normalized count data: X.
computing neighbors
finished (0:00:02) --> added
'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
finished (0:00:15) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
ad_ciz
AnnData object with n_obs × n_vars = 6838 × 31785
obs: 'batch', 'n_genes', 'n_counts', 'percent_ct', 'leiden_0.0', 'leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 'leiden_0.6', 'leiden_0.7', 'leiden_0.8', 'leiden_0.9', 'leiden_1.0', 'leiden_1.1', 'leiden_1.2', 'leiden_1.3', 'leiden_1.4', 'leiden_1.5', 'leiden_1.6', 'leiden_1.7', 'leiden_1.8', 'leiden_1.9', 'leiden_2.0', 'leiden', 'UMI counts', 'Gene counts', 'Sample', 'leiden_R', 'Cluster', 'Cell type', '__group', 'Sample_two', 'Partition', 'UMI counts log10', 'Sample_new', 'wgcna_cluster', 'cluster_mergeUC', 'Sample_time', 'vb_ct'
var: 'n_cells', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches', 'mean', 'std', 'SNF', 'means_ForPickMock', 'Cluster_2_mean', 'Cluster_2_expressedRatio', 'Cluster_12_mean', 'Cluster_12_expressedRatio', 'Cluster_9_mean', 'Cluster_9_expressedRatio', 'Cluster_4_mean', 'Cluster_4_expressedRatio', 'Cluster_0_mean', 'Cluster_0_expressedRatio', 'Cluster_11_mean', 'Cluster_11_expressedRatio', 'Cluster_1_mean', 'Cluster_1_expressedRatio', 'Cluster_8_mean', 'Cluster_8_expressedRatio', 'Cluster_7_mean', 'Cluster_7_expressedRatio', 'Cluster_5_mean', 'Cluster_5_expressedRatio', 'Cluster_6_mean', 'Cluster_6_expressedRatio', 'Cluster_10_mean', 'Cluster_10_expressedRatio', 'Cluster_3_mean', 'Cluster_3_expressedRatio', 'Cluster_13_mean', 'Cluster_13_expressedRatio', 'Cluster_14_mean', 'Cluster_14_expressedRatio', 'Sample_Large Nodule_mean', 'Sample_Large Nodule_expressedRatio', 'Sample_Small Nodule_mean', 'Sample_Small Nodule_expressedRatio', 'Sample_Root_mean', 'Sample_Root_expressedRatio', 'gene_count_corr'
uns: 'Cell type_cellexES', 'Cell type_cellexES_batch', 'Cell type_colors', 'Cluster_colors', 'Sample_colors', 'Sample_new_colors', 'batch_colors', 'cluster_mergeUC_cellexES', 'cosg_leiden_R', 'hvg', 'jaRelated_marker', 'leiden', 'leiden_R_cellexES', 'leiden_colors', 'neighbors', 'rank_genes_groups', 'smDetected_auc', 'sm_auc', 'sn_wgcna_module', 'sn_wgcna_module_0605_triku10000', 'sn_wgcna_module_0605_triku10000_filtered', 'umap', 'vb_ct_cellexES', 'Sample_time_colors', 'log1p', 'pca'
obsm: 'X_scvi', 'X_scvi_withBatchEffect', 'X_umap', 'jaRelated_marker', 'scDblFinder', 'smDetected_auc', 'sm_auc', 'sn_wgcna_module', 'sn_wgcna_module_0605_triku10000', 'sn_wgcna_module_0605_triku10000_filtered', 'seurat_integrated_data', 'X_pca_seurat', 'X_pca'
varm: 'Cell type_cellexES', 'cluster_mergeUC_cellexES', 'leiden_R_cellexES', 'vb_ct_cellexES', 'PCs'
layers: 'empty', 'normalize_log', 'normalize_log_scale', 'raw', 'spliced', 'unspliced', 'Ms', 'Mu'
obsp: 'connectivities', 'distances'
from cellrank.tl.kernels import CytoTRACEKernel
ctk = CytoTRACEKernel(ad_ciz)
ctk.compute_transition_matrix(threshold_scheme="soft", nu=0.5)
ctk.compute_projection(basis="umap")
0%| | 0/6838 [00:00<?, ?cell/s]
_dt = singleCellTools.basic.getadataColor(ad, "Cluster")
ad_ciz = singleCellTools.basic.setadataColor(ad_ciz, "Cluster", _dt)
with plt.rc_context({"figure.figsize": (3, 4)}):
ax = scv.pl.velocity_embedding_stream(
ad_ciz,
color="Cluster",
vkey="T_fwd",
basis="umap",
legend_loc="right",
smooth=0.5,
min_mass=3,
title="Cluster",
legend_fontsize=16,
show=False,
fontsize=18
)
plt.sca(ax)
plt.xlim(3, 13)
# for text in ax.texts:
# text.set_color('black')
plt.show()
from cellrank.tl.estimators import GPCCA
g_fwd = GPCCA(ctk)
print(g_fwd)
GPCCA[n=6838, kernel=<CytoTRACEKernel[dnorm=False, scheme=soft, b=10.0, nu=0.5]>]
g_fwd.compute_schur(n_components=20)
g_fwd.plot_spectrum(real_only=True)
Mat Object: 1 MPI processes type: seqdense 1.0000000000000002e+00 -1.3451811240545000e-02 -6.7646899729889012e-03 2.2948015605340516e-02 -6.5394745934908025e-03 -1.7084586751363498e-02 3.8429786583646771e-04 -3.6594000640256093e-02 -2.2549879339336918e-02 5.4377189336820564e-03 -3.8900647407905962e-02 1.5281971719723590e-02 1.1929486536118791e-02 2.8574492438253440e-02 -4.8147440921371493e-02 6.5807556367455259e-03 -5.7265620685272689e-02 2.5269023116557450e-04 -2.5595612228181620e-02 -2.5380435719810197e-02 0.0000000000000000e+00 9.9574004181986508e-01 -6.4117460866483164e-03 4.7159515370456748e-03 -1.9550522125730586e-02 2.0379591995956240e-02 2.7806638725345254e-02 -3.5513370589696426e-02 -1.6354801406212635e-02 1.3858483423501104e-03 1.1049553367956787e-02 -1.0011282480857361e-02 -2.8619389580210216e-04 8.6534741977150435e-03 -1.7615037311385560e-02 -9.0789982484496563e-03 -2.7557405991521386e-02 -5.1932883597652435e-03 -3.3778092836776142e-03 -3.2582799780006805e-02 0.0000000000000000e+00 0.0000000000000000e+00 9.7284160719528368e-01 -5.2307933295244365e-03 -1.2162888667961190e-02 -2.6052926088249252e-02 -3.6890826693933803e-03 -4.5110779862162387e-02 1.9294040432441237e-02 -8.3102498371256470e-03 2.9615169848603271e-02 -6.6925375388203305e-03 -3.2703999113389323e-02 -2.0783472886202158e-02 6.5017590716285757e-03 4.5838669648460484e-03 -1.0831436915497520e-02 -4.0557398817411315e-03 -8.0550937718316430e-03 -3.5009763190810082e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 9.6910061831606420e-01 2.7755524375854889e-02 2.6789675849733494e-02 -6.7994162509397360e-03 3.2456192922237156e-02 2.9817270331512380e-02 -2.7666955079108366e-02 -3.2511418392517484e-02 1.0213869876716521e-02 3.9548406071101966e-02 4.0489926927950219e-02 2.5414150351478646e-03 -3.4383607373746586e-02 3.2350402101766859e-02 -6.7693597348953971e-03 5.5674713334840139e-03 1.3402312615516691e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 9.5204289260329766e-01 -4.4918554057079986e-02 -4.1669427117474196e-03 -2.5535031945882011e-02 -1.9913600370339395e-02 3.3688752877025249e-03 2.9282840664280822e-02 -1.5760784703654142e-02 -2.4117631980287400e-02 -3.8724153643470823e-02 -5.2059732637276838e-03 -5.3329243603247388e-02 -3.0332976070578613e-02 -8.1480110230386198e-03 1.7681479652777343e-02 -6.0860970590503379e-03 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 9.3972689724144254e-01 1.7097056957131557e-02 -7.0831126570306650e-03 8.0456831582109718e-03 -2.8631265097419059e-03 -5.7619976202328220e-02 3.8753264957167362e-02 -2.2672954015573000e-02 -3.1062045077091725e-02 -1.9799757636368136e-02 -9.3033453077914132e-03 -9.7025481001565689e-03 2.0596290181702427e-02 -2.5804791478355005e-02 4.2587427706800754e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 9.2188888039407835e-01 -1.2160179910264595e-02 -3.4299498598967682e-03 -4.6277772976317270e-03 -1.0593253274222328e-02 4.9733810282430610e-02 -1.5494991171488812e-02 -6.1690851165729095e-02 4.4105060858967704e-02 -1.0494925395606597e-02 -1.1416676288946105e-02 -2.9237217985857811e-02 2.1642699355752883e-02 -1.2680685139009558e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.8912459942365230e-01 5.1771869807878110e-02 -7.6227783478377371e-03 3.0203515254787348e-02 -9.6966200345487716e-04 2.6194583084499810e-02 -6.2221030347278218e-03 -3.8690904806113528e-02 1.0696199053845588e-02 -4.9557804073556762e-02 -1.5577761146738620e-02 -3.7109250298224615e-02 -8.4840812307487891e-03 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.8837630650288957e-01 2.8427568512666881e-02 1.9413424523085653e-02 -1.6650642659850758e-02 2.6349604894633238e-02 -2.4457539695007441e-02 -3.9285441701585426e-03 2.1779799787440955e-02 -7.3345943152724635e-02 -5.2888193119274823e-03 6.3720893412754906e-03 -2.1593985521456426e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.7605525338554635e-01 -6.0850369993118983e-03 7.4262430867135030e-04 -8.3375136485804493e-03 4.0030373493119477e-02 -1.2616204060570465e-03 3.0158112588262526e-02 3.1394633668948228e-02 1.5049039131952758e-02 7.4043416182848734e-04 -2.3725683662015565e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.7446792672859641e-01 -1.2608261875826494e-02 -7.7200307482247696e-03 -6.8779069332632259e-03 -2.9757560444502737e-03 1.8279605958510733e-02 -7.5081817976049925e-03 -9.3567085636787565e-03 -1.0286318574928857e-02 1.3702404833830678e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.6152382231358382e-01 -2.0224341848457915e-02 -2.7793968848346718e-02 -1.2292709033371881e-02 -1.5836899061438480e-03 -2.5610441397871934e-02 -2.0139574371903168e-02 -5.9603806709917174e-03 -6.5124902106289947e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.5166390083617938e-01 1.2764177206007341e-02 1.2849442359027533e-02 -1.0443052156981653e-02 -2.2091446713062176e-02 5.7796694656280113e-03 2.5282578071409465e-02 2.8398213637389279e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.4355159080168185e-01 -3.6762184665347059e-02 1.2085562173739833e-02 -6.1836173223743900e-03 -2.3061416712557913e-02 7.4269713914275073e-02 6.9833391016684510e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.2619225420546560e-01 -1.5555564999387247e-02 2.0191816094690274e-02 4.3877583500312643e-02 -4.9206263738627701e-02 7.4486627342706183e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 8.2088019982175764e-01 7.3972437604835997e-04 2.9285641812670398e-03 7.0436284125212903e-03 6.2094564031374838e-03 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 7.9259603682705437e-01 -1.3917540101293134e-02 -3.2951670148363560e-02 6.2918373328248053e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 7.8473909946491116e-01 -9.9765157013332346e-03 2.2309345530244943e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 7.8221800289165799e-01 1.1760049680517064e-02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 7.7883893885711120e-01
g_fwd.compute_macrostates(n_states=3, cluster_key="Cluster")
g_fwd.plot_macrostates(
discrete=True, legend_loc="right", size=100, basis="umap"
)
ax = g_fwd.plot_macrostates(
['11', '7_2'],discrete=True, legend_loc="right", size=100, basis="umap", title='Terminal states'
)
ax
ax = g_fwd.plot_macrostates(
['11', '7_2'],discrete=True, legend_loc="right", size=100, basis="umap", title='Terminal states'
)
ax
g_fwd.plot_coarse_T(show_initial_dist=True)
g_fwd.compute_macrostates(n_states=3, cluster_key="Cluster")
g_fwd.set_terminal_states_from_macrostates(names={"11": "11", "7_2": "7"}) # 7_1 has the smallest value in the coarse-grained stationary distribution, so we don't need to set it as a terminal state
g_fwd.compute_absorption_probabilities()
g_fwd.plot_absorption_probabilities(same_plot=False, size=10, basis="umap", title=['Probabilities into terminal states 11', 'Probabilities into terminal states 7'])
0%| | 0/2 [00:00<?, ?/s]
[0]PETSC ERROR: ------------------------------------------------------------------------ [0]PETSC ERROR: Caught signal number 13 Broken Pipe: Likely while reading or writing to a socket [0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger [0]PETSC ERROR: or see https://petsc.org/release/faq/#valgrind [0]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple MacOS to find memory corruption errors [0]PETSC ERROR: [0]PETSC ERROR: ------------------------------------------------------------------------ [0]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation, probably memory access out of range [0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger [0]PETSC ERROR: or see https://petsc.org/release/faq/#valgrind [0]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple MacOS to find memory corruption errors [0]PETSC ERROR: configure using --with-debugging=yes, recompile, link, and run
dt_loomPath = {"nodule_large":"/data/Zhaijx/liuzj/projects/singleCell/soybean/02_result/20210922/step1_cellRanger/nodule_large/nodule_large/velocyto/nodule_large.loom",
"nodule_small":"/data/Zhaijx/liuzj/projects/singleCell/soybean/02_result/20210922/step1_cellRanger/nodule_small/nodule_small/velocyto/nodule_small.loom",
"root":"/data/Zhaijx/liuzj/projects/singleCell/soybean/02_result/20210922/step1_cellRanger/root/root/velocyto/root.loom"}
dt_loom = {x:sc.read_loom(y) for x,y in dt_loomPath.items()}
def reformatLoomAd(ad, batch):
ad.obs = ad.obs.rename(index = lambda x:x.split(f"{batch}:")[-1][:-1] + '-1')
return ad
dt_loom= {x:reformatLoomAd(y, x) for x,y in dt_loom.items()}
ad_loom = sc.concat(dt_loom, index_unique='-batch-')
del(dt_loom)
# %store ad_loom ad_ciz
Stored 'ad_loom' (AnnData)
# %store -r ad_loom ad_ciz
%store -r ad_loom
import scvelo as scv
import cellrank as cr
ad_cizForRnaVelocity = ad_loom[ad_ciz.obs.index]
ad_cizForRnaVelocity.obs = ad_ciz.obs
ad_cizForRnaVelocity.obsm
AxisArrays with keys:
ad_cizForRnaVelocity.obsm = ad_ciz.obsm
ad_cizForRnaVelocity
AnnData object with n_obs × n_vars = 6838 × 56826
obs: 'batch', 'n_genes', 'n_counts', 'percent_ct', 'leiden_0.0', 'leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 'leiden_0.6', 'leiden_0.7', 'leiden_0.8', 'leiden_0.9', 'leiden_1.0', 'leiden_1.1', 'leiden_1.2', 'leiden_1.3', 'leiden_1.4', 'leiden_1.5', 'leiden_1.6', 'leiden_1.7', 'leiden_1.8', 'leiden_1.9', 'leiden_2.0', 'leiden', 'UMI counts', 'Gene counts', 'Sample', 'leiden_R', 'Cluster', 'Cell type', '__group', 'Sample_two', 'Partition', 'UMI counts log10', 'Sample_new', 'wgcna_cluster', 'cluster_mergeUC', 'Sample_time', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size'
obsm: 'X_scvi', 'X_scvi_withBatchEffect', 'X_umap', 'jaRelated_marker', 'scDblFinder', 'smDetected_auc', 'sm_auc', 'sn_wgcna_module', 'sn_wgcna_module_0605_triku10000', 'sn_wgcna_module_0605_triku10000_filtered'
layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
scv.pp.filter_and_normalize(ad_cizForRnaVelocity, min_shared_counts=10, n_top_genes=1000)
scv.pp.moments(ad_cizForRnaVelocity, n_pcs=30, n_neighbors=30)
Filtered out 48130 genes that are detected 10 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 1000 highly variable genes.
Logarithmized X.
computing neighbors
finished (0:00:02) --> added
'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
finished (0:00:00) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
scv.tl.recover_dynamics(ad_cizForRnaVelocity, n_jobs=64)
scv.tl.velocity(ad_cizForRnaVelocity, mode='dynamical')
scv.tl.velocity_graph(ad_cizForRnaVelocity, n_jobs=64)
recovering dynamics (using 64/64 cores)
0%| | 0/858 [00:00<?, ?gene/s]
/public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/scvelo/tools/dynamical_model.py:713: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array([dm.alpha, dm.beta, dm.gamma, dm.pars[:3]]) / dm.m[-1] /public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/scvelo/tools/dynamical_model.py:716: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array([dm.t, dm.tau, dm.t_, dm.pars[4]]) * dm.m[-1]
finished (0:01:06) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:08) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 64/64 cores)
0%| | 0/6838 [00:00<?, ?cells/s]
/public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/scvelo/core/_parallelize.py:138: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. res = np.array(res) if as_array else res
finished (0:00:13) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
# %store ad_cizForRnaVelocity
Stored 'ad_cizForRnaVelocity' (AnnData)
_dt = singleCellTools.basic.getadataColor(ad, 'Cluster')
singleCellTools.basic.setadataColor(ad_cizForRnaVelocity, 'Cluster', _dt)
AnnData object with n_obs × n_vars = 6838 × 1000
obs: 'batch', 'n_genes', 'n_counts', 'percent_ct', 'leiden_0.0', 'leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 'leiden_0.6', 'leiden_0.7', 'leiden_0.8', 'leiden_0.9', 'leiden_1.0', 'leiden_1.1', 'leiden_1.2', 'leiden_1.3', 'leiden_1.4', 'leiden_1.5', 'leiden_1.6', 'leiden_1.7', 'leiden_1.8', 'leiden_1.9', 'leiden_2.0', 'leiden', 'UMI counts', 'Gene counts', 'Sample', 'leiden_R', 'Cluster', 'Cell type', '__group', 'Sample_two', 'Partition', 'UMI counts log10', 'Sample_new', 'wgcna_cluster', 'cluster_mergeUC', 'Sample_time', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'velocity_self_transition', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'pca', 'neighbors', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'Cluster_colors', 'paga', 'Cluster_sizes'
obsm: 'X_scvi', 'X_scvi_withBatchEffect', 'X_umap', 'jaRelated_marker', 'scDblFinder', 'smDetected_auc', 'sm_auc', 'sn_wgcna_module', 'sn_wgcna_module_0605_triku10000', 'sn_wgcna_module_0605_triku10000_filtered', 'X_pca', 'velocity_umap'
varm: 'PCs', 'loss'
layers: 'matrix', 'ambiguous', 'spliced', 'unspliced', 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'velocity', 'velocity_u'
obsp: 'distances', 'connectivities'
scv.pl.velocity_embedding_stream(
ad_cizForRnaVelocity,
color="Cluster",
basis="umap",
legend_loc="right",
smooth=0.5,
min_mass=0,
title="Cluster",
legend_fontsize=16,
fontsize=18,
)
scv.tl.paga(ad_cizForRnaVelocity, groups='Cluster', vkey='velocity')
running PAGA using priors: ['velocity_pseudotime']
finished (0:00:03) --> added
'paga/connectivities', connectivities adjacency (adata.uns)
'paga/connectivities_tree', connectivities subtree (adata.uns)
'paga/transitions_confidence', velocity transitions (adata.uns)
scv.pl.paga(ad_cizForRnaVelocity, basis='umap', vkey='velocity', node_size_scale=1.5, min_edge_width=1, max_edge_width=5, edge_width_scale=2, use_raw=False, size=50, alpha=0.1)
WARNING: Invalid color key. Using grey instead.
Seurat = importr('Seurat')
monocle3 = importr('monocle3')
SeuratWrappers = importr('SeuratWrappers')
ggplot2 = importr('ggplot2')
def so2cds(so):
with ro.local_context() as rlc:
rlc['so'] = so
R("""
cds <- as.cell_data_set(so)
cds <- estimate_size_factors(cds)
cds@rowRanges@elementMetadata@listData[["gene_short_name"]] <- rownames(so[["RNA"]])
""")
cds = rlc['cds']
return cds
_ls = ["0", "7", "11"]
_ad = ad[ad.obs.eval("Sample in ['Large Nodule', 'Small Nodule'] & Cluster in @_ls")]
so_ifz = ad2so(_ad)
/public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/rpy2/robjects/conversion.py:28: DeprecationWarning: The use of {name} in module {__name__} is deprecated. Use (__name__}.get_conversion() instead of {__name__}.converter.
warnings.warn(
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning:
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Feature names cannot have underscores ('_'), replacing with dashes ('-')
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning:
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Invalid name supplied, making object name syntactically valid. New object name is batchn_genesn_countspercent_ctleiden_0.0leiden_0.1leiden_0.2leiden_0.3leiden_0.4leiden_0.5leiden_0.6leiden_0.7leiden_0.8leiden_0.9leiden_1.0leiden_1.1leiden_1.2leiden_1.3leiden_1.4leiden_1.5leiden_1.6leiden_1.7leiden_1.8leiden_1.9leiden_2.0leidenUMI.countsGene.countsSampleleiden_RClusterCell.typeX__groupSample_twoPartitionUMI.counts.log10Sample_newwgcna_clustercluster_mergeUCSample_time; see ?make.names for more details on syntax validity
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning:
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Keys should be one or more alphanumeric characters followed by an underscore, setting key from scvi_withBatchEffect_ to scviwithBatchEffect_
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning:
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to scviwithBatchEffect_
/public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/rpy2/robjects/conversion.py:28: DeprecationWarning: The use of {name} in module {__name__} is deprecated. Use (__name__}.get_conversion() instead of {__name__}.converter.
warnings.warn(
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning:
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Adding a Graph without an assay associated with it
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning:
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Adding a Graph without an assay associated with it
cds_ifz = so2cds(so_ifz)
ls_cizHvg = ad_ciz.var.loc[lambda df:df['highly_variable']].index.to_list() >> F(map, lambda x:x.replace('_', '-')) >> F(list)
arR_cizHvg = R.c(*ls_cizHvg)
%%R -i cds_ifz -i arR_cizHvg
ar_umap <- reducedDim(cds_ifz, 'UMAP')
%%R
cds_ifz <- preprocess_cds(cds_ifz, num_dim = 50, use_genes=arR_cizHvg)
# cds <- preprocess_cds(cds, num_dim = 30, use_genes = lsR_hvgGene)
cds_ifz <- align_cds(cds_ifz, alignment_group = "batch")
cds_ifz <- reduce_dimension(cds_ifz)
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Aligning cells from different batches using Batchelor. Please remember to cite: Haghverdi L, Lun ATL, Morgan MD, Marioni JC (2018). 'Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors.' Nat. Biotechnol., 36(5), 421-427. doi: 10.1038/nbt.4091 WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: No preprocess_method specified, and aligned coordinates have been computed previously. Using preprocess_method = 'Aligned'
%%R
cds_ifz <- cluster_cells(cds_ifz)
%%R
cds_ifz <- learn_graph(cds_ifz)
|======================================================================| 100%
%%R -w 410 -h 256
plot_cells(cds_ifz, label_groups_by_cluster=T, color_cells_by = "Cluster", group_label_size=0,
label_cell_groups=F, label_roots = F,label_leaves = F, label_branch_points =F) +
ggplot2::scale_color_manual(values=c('#1f77b4', '#b5bd61', '#98df8a')) +
theme(legend.position = 'right')
/public/home/liuzj/softwares/anaconda3/envs/sc_py/lib/python3.8/site-packages/rpy2/ipython/rmagic.py:813: DeprecationWarning: The `source` parameter emit a deprecation warning since IPython 8.0, it had no effects for a long time and will be removed in future versions. displaypub.publish_display_data(data=disp_d, source=tag,
%%R
cds_ifz <- order_cells(cds_ifz)
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Listening on http://127.0.0.1:6533 Error: cannot open display: localhost:25.0
%%R -w 410 -h 256
plot_cells(cds_ifz,
color_cells_by = "pseudotime",
label_cell_groups=FALSE,
label_leaves=FALSE,
label_branch_points=FALSE,
label_roots=FALSE,
graph_label_size=1.5)
sc.pl.umap(ad, color='Cluster')
sc.tl.leiden(ad, resolution=0.1, restrict_to=('Cluster', ['12']))
ax = sc.pl.umap(ad, color='leiden_R', show=False)
ad.obs['leiden_R'] = ad.obs['leiden_R'].map(lambda x:x.replace(',', '-'))
ax = sc.pl.umap(ad, color='leiden_R', show=False, legend_loc='on data')
sc.pl.umap(ad, color='leiden_R', ax=ax)
sc.pl.umap(
ad[ad.obs.eval("Cluster in ['12']")],
color="leiden_R", show=False, palette=['#EEBFC2', '#DE1E2A'], legend_loc=None,size=12
)
sns.despine(left=True, bottom=True)
plt.title('')
plt.xlabel('')
plt.ylabel('')
Text(0, 0.5, '')
ls_allDetectedSmGenes = (
pd.read_excel(
"/data/Zhaijx/liuzj/projects/singleCell/soybean/01_data/yangweicai/1-s2.0-S1673852722001242-mmc1.xlsx"
)
.dropna(subset=["Protein ID"])
.eval(
"geneID = `Protein ID`.str.replace('Glyma.', 'GLYMA_').str.split('.').str[0]",
engine="python",
)
.query("geneID in @ad.var.index")["geneID"]
.to_list()
)
singleCellTools.geneEnrichInfo.getAUCellScore(
ad,
{"sm": ls_allDetectedSmGenes},
layer="raw",
aucMaxRank=1000,
label="smDetected_auc",
)
on disk mode: False, transfer `<class 'scipy.sparse.csc.csc_matrix'>` to R: End. Elapsed time: 33
WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Quantiles for the number of genes detected by cell: (Non-detected genes are shuffled at the end of the ranking. Keep it in mind when choosing the threshold for calculating the AUC).
min 1% 5% 10% 50% 100% 460 559 677 757 1226 3880
transfer `data.frame` to python: End. Elapsed time: 4 transfer `data.frame` to python: End. Elapsed time: 0
with plt.rc_context({"figure.figsize": (10, 6)}):
_ad = singleCellTools.plotting.obsmToObs(ad, "smDetected_auc")
sc.pl.umap(
_ad,
color=_ad.uns["plot_obsm"],
title=["Symbiosome membrane genes expression score"],
cmap="Reds",
size=10,
)
ls_palette = ["#EEBFC2", "#DE1E2A", "#126FB0", "#B5BE62", "#A0E192"]
fig, ax = plt.subplots(figsize=(2, 3))
sns.boxplot(
data=_ad.obs.query("Cluster in ['0', '7', '11', '12']").eval(
"leiden_R = leiden_R.cat.set_categories(['12-0', '12-1', '0', '7', '11'])",
engine="python",
),
x="leiden_R",
y="sm", fliersize=0
)
plt.xticks(rotation=-30, ha="center")
plt.ylabel("AUC score of genes encoding\nsymbiosis membrane protein")
for i, (box, color) in enumerate(zip(ax.artists, ls_palette)):
box.set_edgecolor(color)
box.set_facecolor("white")
# iterate over whiskers and median lines
for j in range(6 * i, 6 * (i + 1)):
ax.lines[j].set_color(color)
sns.despine()
plt.xlabel('')
Text(0.5, 0, '')
dt_smAuc = (
_ad.obs.query("Cluster in ['0', '7', '11', '12']")
.groupby("leiden_R")["sm"]
.agg(list)
.dropna()
.to_dict()
)
from scipy.stats import median_test
from itertools import product
for x, y in product(dt_smAuc.keys(), dt_smAuc.keys()):
p = median_test(dt_smAuc[x], dt_smAuc[y])[1]
# if p < 0.05:
print(x, y, median_test(dt_smAuc[x], dt_smAuc[y])[1])
0 0 1.0 0 7 0.2034484396014494 0 11 9.373972846965979e-14 0 12-0 8.224499692628447e-103 0 12-1 0.6260464254831073 7 0 0.2034484396014494 7 7 1.0 7 11 5.5428443678344805e-15 7 12-0 3.683277465427867e-122 7 12-1 0.6222062973914226 11 0 9.373972846965979e-14 11 7 5.5428443678344805e-15 11 11 1.0 11 12-0 1.0057250379484212e-150 11 12-1 0.8592595902966044 12-0 0 8.224499692628681e-103 12-0 7 3.683277465427867e-122 12-0 11 1.0057250379484212e-150 12-0 12-0 1.0 12-0 12-1 4.674072547402916e-10 12-1 0 0.6260464254831073 12-1 7 0.6222062973914226 12-1 11 0.8592595902966044 12-1 12-0 4.674072547402916e-10 12-1 12-1 1.0
median_test(dt_smAuc['0'], dt_smAuc['12-1'])
(0.23746035776330274,
0.6260464254831073,
0.10513313313313313,
array([[2434, 17],
[2431, 21]]))
# ad.write_h5ad(f"{dir_result}/ad_brief_com_version_1.h5ad")
f"{dir_result}/ad_brief_com_version_1.h5ad"
'/public/home/liuzj/projects/singleCell/soybean/02_result/20210922/analysis/noduleWithRoot//ad_brief_com_version_1.h5ad'
singleCellTools.geneEnrichInfo.calculateEnrichScoreByCellex(ad, 'raw', 'leiden_R')
ad.obs['leiden_R'].str.replace(',', '-')
AAACCCAAGACGCAGT-1-batch-nodule_large 2
AAACCCAAGAGGATCC-1-batch-nodule_large 12-0
AAACCCACAAATACAG-1-batch-nodule_large 9
AAACCCACAGCAGTAG-1-batch-nodule_large 4
AAACCCACAGCTGTAT-1-batch-nodule_large 2
...
TTTGTTGGTGTTACAC-1-batch-root 5
TTTGTTGTCAGTCCGG-1-batch-root 4
TTTGTTGTCCTCTTTC-1-batch-root 3
TTTGTTGTCGGTTGTA-1-batch-root 3
TTTGTTGTCTCATTTG-1-batch-root 6
Name: leiden_R, Length: 26712, dtype: object
_dt = {'Developping Nodule': "Developing Nodule"}
ad.obs['Sample_new'] = ad.obs['Sample_new'].map(lambda x:_dt.get(x,x))
ad.obs['Sample_new']
AAACCCAAGACGCAGT-1-batch-nodule_large Mature Nodule
AAACCCAAGAGGATCC-1-batch-nodule_large Mature Nodule
AAACCCACAAATACAG-1-batch-nodule_large Mature Nodule
AAACCCACAGCAGTAG-1-batch-nodule_large Mature Nodule
AAACCCACAGCTGTAT-1-batch-nodule_large Mature Nodule
...
TTTGTTGGTGTTACAC-1-batch-root Root
TTTGTTGTCAGTCCGG-1-batch-root Root
TTTGTTGTCCTCTTTC-1-batch-root Root
TTTGTTGTCGGTTGTA-1-batch-root Root
TTTGTTGTCTCATTTG-1-batch-root Root
Name: Sample_new, Length: 26712, dtype: category
Categories (3, object): ['Mature Nodule', 'Developing Nodule', 'Root']
import importlib
importlib.reload(singleCellTools.plotting)
<module 'jpy_tools.singleCellTools.plotting' from '/public/home/liuzj/softwares/anaconda3/lib/python3.8/site-packages/jpy_tools/singleCellTools/plotting.py'>
fig, ax = plt.subplots(figsize=(1, 2))
singleCellTools.plotting.plotLabelPercentageInCluster(
ad[ad.obs.eval("Cluster in ['12'] & Sample != 'Root'")],
"leiden_R",
"Sample_time",
ax=ax,
dt_kwargsForLegend=dict(loc="upper center", bbox_to_anchor=(0.5, -0.3), ncol=1)
)
plt.xlabel("Sub-cluster")
Text(0.5, 0, 'Sub-cluster')
fig, ax = plt.subplots(figsize=(1.5, 3))
singleCellTools.plotting.plotLabelPercentageInCluster(
ad[ad.obs.eval("Cluster in ['12'] & Sample != 'Root'")],
"leiden_R",
"Sample_new",
ax=ax,
dt_kwargsForLegend=dict(loc="upper left", bbox_to_anchor=(-0.07, -0.5), ncol=1),
)
ax.yaxis.set_major_locator(ticker.MultipleLocator(20))
plt.xlabel("Sub-cluster")
Text(0.5, 0, 'Sub-cluster')
ls_c12_1ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '12,1' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")
.query("gene in @ls_knownSnfGenes")["gene"]
.to_list()
)
ls_c12_1ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '12,1' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")
.query("gene in @ls_knownSnfGenes")["gene"]
.to_list()
)
ls_c12_1ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '12,1' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")["gene"]
.to_list()
)
ad.uns["leiden_R_cellexES"].query(
"leiden_R == '12,1' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01"
).nlargest(50, "enrichScore").assign(
SNF=lambda df: np.where(df["gene"].isin(ls_knownSnfGenes), "True", "")
).reset_index(drop=True).to_excel(f"{dir_result}/12_1_marker.xlsx")
df_go = enrichmentAnalysisGO(ls_c12_1ClusterGenes, ad.var.index.to_list(), dt_goToCat = dt_goToCat, qvalueCutoff=0.2, pvalueCutoff=0.05)
if df_go.empty:
print("Empty")
df_go.insert(0, 'Cluster', '12-1')
fig, ax = plt.subplots(figsize=(4, df_go.shape[0] * 0.2))
sns.barplot(data=df_go, x="-log10Pvalue", y="Description", hue = 'Category', ax=ax, dodge=False, palette=dt_goColor)
plt.legend(loc='lower left', bbox_to_anchor=[1,0])
plt.ylabel('')
sns.despine(top=True, right=True)
plt.title('12-1')
plt.show()
transfer `data.frame` to python: start transfer `data.frame` to python: End. Elapsed time: 0
from matplotlib import ticker
len([x for x in ad.var.index if x in df_nodulationRelatedGene['Soybean Gene Parsed ID'].to_list()]), len(ad.var.index)
(251, 39337)
[x for x in ls_c12SpercificGenes if x in df_nodulationRelatedGene['Soybean Gene Parsed ID'].to_list()], len(ls_c12SpercificGenes)
(['GLYMA_07G025800'], 33)
[x for x in ls_c12_1ClusterGenes if x in df_nodulationRelatedGene['Soybean Gene Parsed ID'].to_list()], len(ls_c12_1ClusterGenes)
(['GLYMA_13G093600', 'GLYMA_11G244800', 'GLYMA_06G184400', 'GLYMA_10G198700', 'GLYMA_16G177500', 'GLYMA_19G114600'], 50)
[x for x in ls_c12_0ClusterGenes if x in df_nodulationRelatedGene['Soybean Gene Parsed ID'].to_list()], len(ls_c12_0ClusterGenes)
([], 38)
fig, ax = plt.subplots(figsize=(2, 3))
# sns.barplot(y=["1", "2", "3"], x=[100, 100, 100], palette=["#D3D3D3"])
sns.barplot(
x=["1", "2", "3", "4"],
y=[6 / 50 * 100, 0 / 38 * 100, 1/33 * 100, 251 / 39337 * 100],
palette=["black"],
)
plt.ylabel("Percentage of known SNF genes")
plt.ylim(0, 14)
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
plt.xticks(
[0, 1, 2, 3], ["12-1 specific genes", "12-0 specific genes", "12 specific genes", "All detected genes"], rotation=-90, ha='center', fontsize=10
)
sns.despine()
plt.xticks(rotation=-50, ha='left')
# ls_legendHandle = []
# ls_legendLabel = ["Others", "Known SNF genes"]
# ls_legendHandle.append(plt.Rectangle((0, 0), 1, 1, fc="#D3D3D3", edgecolor="none"))
# ls_legendHandle.append(plt.Rectangle((0, 0), 1, 1, fc="black", edgecolor="none"))
# plt.legend(
# ls_legendHandle[::-1],
# ls_legendLabel[::-1],
# frameon=False,
# **dict(loc="upper left", bbox_to_anchor=(-0.5, -0.6), ncol=2)
# )
(array([0, 1, 2, 3]), [Text(0, 0, '12-1 specific genes'), Text(1, 0, '12-0 specific genes'), Text(2, 0, '12 specific genes'), Text(3, 0, 'All detected genes')])
ad.var["means_ForPickMock"] = ad.to_df('normalize_log').mean()
ad.var["bins_ForPickMock"] = pd.qcut(
ad.var["means_ForPickMock"], 50, duplicates="drop"
)
import tqdm
def staticTestForModuleGeneCounts(ad, ls_gene, propotion, shuffleCounts=500):
ls_snfPropotion = []
for i in tqdm.tqdm(range(shuffleCounts)):
ls_bgGenes = singleCellTools.geneEnrichInfo.getBgGene(
ad, ls_gene, usePreBin="bins_ForPickMock", seed=i, replacement=False
)
ls_bgSnfGenes = [
x
for x in ls_bgGenes
if x in df_nodulationRelatedGene["Soybean Gene Parsed ID"].to_list()
]
ls_snfPropotion.append(len(ls_bgSnfGenes) / len(ls_bgGenes))
sns.histplot(ls_snfPropotion, bins=10)
plt.axvline(propotion)
ax = plt.gca()
# ls_snfPropotion.append(propotion)
pvalue = len([x for x in ls_snfPropotion if x >= propotion]) / shuffleCounts
# df_snfPropotion = pd.concat([pd.Series(ls_snfPropotion), pd.Series(ls_snfPropotion).rank(ascending=False)], axis=1)
# pvalue = df_snfPropotion.loc[lambda df:df[0] == propotion].iat[0, -1] / shuffleCounts
plt.text(
propotion,
0.95,
f"P-value = {pvalue}",
ha="right",
transform=ax.get_xaxis_transform(),
)
plt.show()
return ls_snfPropotion
ls_c12_1ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '12,1' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")["gene"]
.to_list()
)
ls_snfPropotion = staticTestForModuleGeneCounts(ad, ls_c12_1ClusterGenes, 0.12, 1000)
100%|██████████| 1000/1000 [01:59<00:00, 8.34it/s]
from matplotlib import ticker
sns.histplot(ls_snfPropotion, bins=[0, 0.02, 0.04, 0.06], stat='count')
plt.axvline(0.12, ls='--')
ax = plt.gca()
ax.xaxis.set_major_locator(ticker.MultipleLocator(0.02))
sns.despine()
plt.title('$\it{text you want to show in italics}$')
sc.pl.correlation_matrix?
labels = (
"Unknown",
"Known SNF genes (Additional)",
"Known SNF genes (Collected by Roy $\it{et\ al.}$)",
)
sizes = [41, 3, 6]
explode = (0, 0, 0) # only "explode" the 2nd slice (i.e. 'Hogs')
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct="%1.0f%%", startangle=90)
ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
ls_c12_1ClusterGenes = [
"GLYMA_13G093600", 1 SPK1
"GLYMA_05G088400", 2 VPY
"GLYMA_02G076900", 0 NNL1
"GLYMA_11G244800", 1 NPL
"GLYMA_06G184400", 1 RINRK1
"GLYMA_10G198700", 1 RPG
"GLYMA_19G146000", 0 SPL9d
"GLYMA_16G177500", 1 CBS1
"GLYMA_19G114600", 2 VPY
]
ls_c12_1ClusterGenes = [
"GLYMA_13G093600",
"GLYMA_05G088400",
"GLYMA_02G076900",
"GLYMA_11G244800",
"GLYMA_06G184400",
"GLYMA_10G198700",
"GLYMA_19G146000",
"GLYMA_16G177500",
"GLYMA_19G114600",
]
ls_c12_1ClusterNames = [
"SPK1",
"VPY",
"NNL1",
"NPL",
"RINRK1",
"RPG",
"SPL9d",
"CBS1",
"VPY",
]
ls_c12_1ClusterNames = [f"{x}\n({y})" for x,y in zip(ls_c12_1ClusterGenes, ls_c12_1ClusterNames)]
axs = sc.pl.dotplot(
ad,
ls_c12_1ClusterGenes,
groupby="leiden_R",
cmap="Reds",
layer="normalize_log",
swap_axes=True,
show=False,
figsize=(5, 4),
standard_scale="var",
)
plt.sca(axs["mainplot_ax"])
plt.yticks(
range(len(ls_c12_1ClusterNames)) | F(map, lambda x: x + 0.5) | F(list),
ls_c12_1ClusterNames,
style="italic",
size=8,
)
plt.xticks(rotation=-60, ha="center", size=8)
plt.show()
from matplotlib import ticker
axs
[<AxesSubplot:title={'center':'GLYMA_13G093600\n(SPK1)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_05G088400\n(VPY)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_02G076900\n(NNL1)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_11G244800\n(NPL)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_06G184400\n(RINRK1)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_10G198700\n(RPG)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_19G146000\n(SPL9d)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_16G177500\n(CBS1)'}, xlabel='UMAP1', ylabel='UMAP2'>,
<AxesSubplot:title={'center':'GLYMA_19G114600\n(VPY)'}, xlabel='UMAP1', ylabel='UMAP2'>]
with plt.rc_context({"figure.figsize": (4, 4)}):
axs = sc.pl.umap(
ad[ad.obs.eval("Cluster == '12'")],
color=ls_c12_1ClusterGenes,
title=ls_c12_1ClusterNames,
cmap="Reds",
layer="normalize_log",
ncols=5,
# size=15,
show=False
)
for ax in axs:
plt.sca(ax)
# ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
# ax.yaxis.set_major_locator(ticker.MultipleLocator(2))
plt.xlim(8,13)
plt.ylim(8,15)
axs = sc.pl.umap(
ad[ad.obs.eval("Cluster == '12'")],
color=ls_c12_1ClusterGenes,
title=ls_c12_1ClusterNames,
cmap="Reds",
layer="normalize_log",
ncols=5,
# size=15,
show=False
)
for ax in axs:
plt.sca(ax)
# ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
# ax.yaxis.set_major_locator(ticker.MultipleLocator(2))
plt.xlim(8,13)
plt.ylim(8,15)
df_cellexEs = ad.uns["leiden_R_cellexES"].query("leiden_R == '12,1'")
ls_c12_1ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '12,1' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")["gene"]
.to_list()
)
ls_c12_0ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '12,0' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")["gene"]
.to_list()
)
df_rhRpkm = pd.read_excel("/data/Zhaijx/liuzj/projects/singleCell/soybean/01_data/bulkRHData/Soybean RH _FPKM.xls")
ad_rh =sc.AnnData(df_rhRpkm.iloc[:, :7].set_index('gene')).T
ad_rh.var = ad_rh.var.rename(index = lambda sr:sr.replace('Glyma.', 'GLYMA_'))
ad_rh.X = np.log1p(ad_rh.X)
ad_rh.obs = ad_rh.obs.assign(id = lambda x:x.index, time = lambda x:x.index.str[:2], treat = lambda x:x.index.str[2:4])
ad_rh = ad_rh[:, ad_rh.X.sum(0) > 0]
_ls = ['12IN_RH', '12UN_RH', '24IN_RH', '24UN_RH', '48IN_RH', '48UN_RH']
_ls = [_ls[x] for x in [0,2,4,1,3,5]]
ad_rh = ad_rh[_ls]
ad_rh.obs['id'] = ad_rh.obs['id'].astype('category').cat.set_categories(_ls)
Trying to set attribute `.obs` of view, copying.
# ad_rh = ad_rh[[0,1,3,4]]
ad_rh.layers['X'] = ad_rh.X
singleCellTools.plotting.clustermap(
ad_rh,
{"a": [x for x in ls_c12_1ClusterGenes if x in ad_rh.var.index]},
obsAnno="id",
layer="X",
add_gene_name=False,
col_cluster=True,
dendrogram_ratio=0.1,
figsize=(6, 5), standard_scale =1
)
Trying to set attribute `.obs` of view, copying.
<seaborn.matrix.ClusterGrid at 0x2b875bd05ee0>
ls_c12_0ClusterGenes = (
ad.uns["leiden_R_cellexES"]
.query("leiden_R == '11' & expressed_ratio > 0.2 & expressed_ratio_others < 0.01")
.sort_values("enrichScore")
.nlargest(50, "enrichScore")["gene"]
.to_list()
)
singleCellTools.plotting.clustermap(
ad_rh,
{"a": [x for x in ls_c12_0ClusterGenes if x in ad_rh.var.index]},
obsAnno="id",
layer="X",
add_gene_name=False,
col_cluster=True,
dendrogram_ratio=0.1, standard_scale =1,
figsize=(3, 5),
)
Trying to set attribute `.obs` of view, copying.
<seaborn.matrix.ClusterGrid at 0x2b875f2ed670>
dir_resultWithRhizobium = "/public/home/liuzj/projects/singleCell/soybean/02_result/20220916_rhizobium_and_soybean_cellranger/step1_cellRanger/"
ls_sample = ['nodule_large', 'nodule_small', 'root']
ls_cellrangerH5 = [f"{dir_resultWithRhizobium}/{x}/{x}/outs/raw_feature_bc_matrix.h5" for x in ls_sample]
ls_ad = [sc.read_10x_h5(x) for x in ls_cellrangerH5]
ad_rhi = sc.concat(ls_ad, label='batch', keys=ls_sample, index_unique='-batch-')
ad_rhi = ad_rhi[ad.obs.index]
ad_rhi.var['specie'] = ad_rhi.var.index.str.split('_').str[0]
/tmp/ipykernel_214833/901161390.py:1: ImplicitModificationWarning: Trying to modify attribute `.var` of view, initializing view as actual.
ad_rhi.var['specie'] = ad_rhi.var.index.str.split('_').str[0]
ad_rhi.obs['Soybean_umis'] = ad_rhi[:, ad_rhi.var['specie'] == 'soybean'].X.sum(1).A1
ad_rhi.obs['Rhizobium_umis'] = ad_rhi[:, ad_rhi.var['specie'] == 'rhizobium'].X.sum(1).A1
for batch, _df in ad_rhi.obs.groupby('batch'):
break
for batch, _df in ad_rhi.obs.groupby('batch'):
sns.jointplot(
data=_df, x='Soybean_umis', y='Rhizobium_umis', xlim=(-100, 6100), ylim=(-100, 6100),
marginal_kws=dict(bins=np.linspace(0, 6000, 101)),
joint_kws=dict(alpha=0.01)
)
plt.show()
def addDensityToDf(df:pd.DataFrame, x:str, y:str, group=None, bins=20):
from scipy.interpolate import interpn
from scipy.stats import gaussian_kde
if group:
df = df.groupby(group, as_index=False).apply(lambda df: addDensityToDf(df, x=x, y=y, bins=bins)).reset_index(level=0, drop=True)
elif bins is None:
x = df[x].values
y = df[y].values
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
df['temp_density'] = z
df = df.sort_values('temp_density')
else:
x = df[x].values
y = df[y].values
data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
z[np.where(np.isnan(z))] = 0.0
df['temp_density'] = z
df = df.sort_values('temp_density')
return df
df = addDensityToDf(ad_rhi.obs.eval("Soybean_umis = Soybean_umis \n Rhizobium_umis = Rhizobium_umis"), x='Soybean_umis', y='Rhizobium_umis', group='batch', bins=None)
_dt = {'nodule_large': "Nodule (21 dpi)", "nodule_small": "Nodule (7 dpi)", "root": "Root"}
fig = plt.Figure(figsize=(12, 4))
ls_subfig = fig.subfigures(1, 4, width_ratios=(1,1,1,0.05))
for (batch, _df), subfig in zip(df.groupby('batch'), ls_subfig):
(
so.Plot(_df, x='Soybean_umis', y='Rhizobium_umis', color='temp_density')
.add(so.Dot(fill=True, edgewidth=0, pointsize=3, alpha=1), legend=False)
.scale(color='viridis')
# .scale(y='log', x='log', color='viridis')
.limit(x=(-100,6100), y=(-100,6100))
.layout(size=(5,4))
.label(title=_dt[batch], x='Soybean UMI counts', y='Rhizobium UMI counts')
.theme(dt_snsStyle)
.on(subfig).plot()
)
from matplotlib.colors import Normalize
from matplotlib import cm
ax = ls_subfig[-1].add_subplot(111)
norm = Normalize(vmin = np.min(_df['temp_density']), vmax = np.max(_df['temp_density']))
cbar = ls_subfig[-1].colorbar(cm.ScalarMappable(norm = norm), cax=ax)
cbar.set_ticks([])
cbar.ax.set_ylabel('Density')
plt.tight_layout()
fig
<Figure size 432x288 with 0 Axes>
df_araMarker = pd.read_table("/data/Zhaijx/liuzj/projects/singleCell/soybean/01_data/benfey.marker.csv")
df_araMarkerNoStage = pd.read_table("/data/Zhaijx/liuzj/projects/singleCell/soybean/01_data/benfey.marker.without.stage.csv")
dt_arab2soybean = pd.read_table(
"/public/home/liuzj/data/ortholog/plant/parsed/1v1/arab__v__soybase_2.1v1.tsv"
).set_index("arab")["soybase_2"].to_dict()
dt_araMarkerOth = (
df_araMarker.eval("soybean_gene = gene.map(@dt_arab2soybean)").query("soybean_gene in @ad.var.index")
.dropna(subset=["soybean_gene"]).groupby('cell type + dev stage group')['soybean_gene']
.apply(lambda x: x.tolist()).to_dict()
)
dt_araMarkerOthNoStage = (
df_araMarkerNoStage.eval("soybean_gene = gene.map(@dt_arab2soybean)").query("soybean_gene in @ad.var.index")
.dropna(subset=["soybean_gene"]).groupby('cell type group')['soybean_gene']
.apply(lambda x: x.tolist()).to_dict()
)
singleCellTools.geneEnrichInfo.getAUCellScore(ad, dt_araMarkerOthNoStage, layer='raw', threads=12, label='araMarkerOrthlogNoStage_auc')
Create regulons from a dataframe of enriched features. Additional columns saved: []
singleCellTools.geneEnrichInfo.getAUCellScore(ad, dt_araMarkerOth, layer='raw', threads=12, label='araMarkerOrthlog_auc')
Create regulons from a dataframe of enriched features. Additional columns saved: []
_ad = singleCellTools.plotting.obsmToObs(ad, 'araMarkerOrthlog_auc')
sc.pl.umap(_ad, color=_ad.uns['plot_obsm'], cmap='Reds')
ad_stele = ad[ad.obs.eval("Cluster in ['3', '9']")]
_ad = singleCellTools.plotting.obsmToObs(ad_stele, 'araMarkerOrthlog_auc')
sc.pl.umap(_ad, color=_ad.uns['plot_obsm'], cmap='Reds', ncols=6)
_ad = singleCellTools.plotting.obsmToObs(ad_stele, 'araMarkerOrthlogNoStage_auc')
sc.pl.umap(_ad, color=_ad.uns['plot_obsm'], cmap='Reds', ncols=5)
import scvi
ad_ara = sc.read_h5ad('/data/Zhaijx/qinyw/data/flsnRNA/GSE152766_Root/GSE152766_Root_Atlas.h5ad')
sc.pl.embedding(ad_ara, 'umap_2D_integrated', color=['celltype.anno', 'time.anno'], wspace=0.5)
singleCellTools.basic.initLayer(ad_ara, layer='RNA_counts')
ad_ara = ad_ara[:, ad_ara.var.index.isin(list(dt_arab2soybean.keys()))]
ad_ara.var.index = ad_ara.var.index.map(dt_arab2soybean)
ad_ara.obs['specie'] = 'arabidopsis'
ad_araSteleMature = ad_ara[ad_ara.obs.eval(
"`celltype.anno` in ['Metaphloem & Companion Cell', 'Protophloem', 'Procambium', 'Xylem Pole Pericycle', 'Phloem Pole Pericycle', 'Protoxylem', 'Metaxylem'] & `time.anno` in ['Maturation']"
)]
sc.pl.embedding(ad_araSteleMature, 'umap_2D_integrated', color=['celltype.anno', 'time.anno'], wspace=0.5)
toPkl(ad_stele, 'ad_stele', 'scem')
toPkl(ad_araSteleMature, 'ad_araSteleMature', 'scem')
2022-11-02 19:44:36.388 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_stele', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
2022-11-02 19:45:23.698 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_araSteleMature', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
0 0
# On SCEM
# ad_araSteleMature = loadPkl('ad_araSteleMature', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
# ad_stele = loadPkl('ad_stele', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
# ad_araSteleMature.obs['batch'] = 'root'
# ad_stele.obs['orig.ident'] = 'soybean_' + ad_stele.obs['batch'].astype(str)
# ad_stele.obs['specie'] = 'soybean'
# ad_steleScanviMerged = singleCellTools.annotation.labelTransferByScanvi(
# ad_araSteleMature, 'celltype.anno', 'raw', ad_stele, 'raw', ls_removeCateKey=["orig.ident", "specie", "batch"], mode="merge",
# batch_size_ref=2**8,
# batch_size_query=2**8,
# dt_params2SCVIModel = {'n_layers': 5},
# dt_params2SCANVIModel = {'n_layers':5},
# n_top_genes=3000, needLoc=True,
# hvgBatch='specie')
# toPkl(ad_steleScanviMerged, 'ad_steleScanviMerged', 'ipf')
# toPkl(ad_stele, 'ad_stele', 'ipf')
ad_steleScanviMerged = loadPkl('ad_steleScanviMerged', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
ad_stele = loadPkl('ad_stele', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')
_ls_order = [ 'Procambium', 'Protophloem', 'Protoxylem', 'Metaphloem & companion cell', 'Phloem pole pericycle','Xylem pole pericycle', 'Unknown']
ad_stele.obs['labelTransfer_scanvi_celltype.anno'] = ad_stele.obs['labelTransfer_scanvi_celltype.anno'].str.capitalize().astype('category').cat.set_categories(_ls_order)
ad_steleScanviMerged.obs['labelTransfer_scanvi_celltype.anno'] = ad_steleScanviMerged.obs['labelTransfer_scanvi_celltype.anno'].str.capitalize().astype('category').cat.set_categories(_ls_order)
def replaceSteleLabel(line):
if line.name in ad_stele.obs.index:
return ad_stele.obs.loc[line.name, 'labelTransfer_scanvi_celltype.anno']
else:
return line.loc['labelTransfer_scanvi_celltype.anno']
ad_steleScanviMerged.obs['labelTransfer_scanvi_celltype.anno'] = ad_steleScanviMerged.obs.apply(replaceSteleLabel, axis=1).astype('category').cat.set_categories(_ls_order)
ad_steleScanviMerged.uns['labelTransfer_scanvi_celltype.anno_colors'][-1] = '#7F7F7F'
ax = sc.pl.umap(ad_steleScanviMerged, color='specie', show=False, title='Specie')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, ['Arabidopsis', 'Soybean'], loc='upper center', bbox_to_anchor=(0.5, 0), frameon=False, ncol=2)
plt.xlabel('')
plt.ylabel('')
plt.show()
ax = sc.pl.umap(ad_steleScanviMerged, color='labelTransfer_scanvi_celltype.anno', show=False, title='Label transfer results')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 0), frameon=False, ncol=2)
plt.xlabel('')
plt.ylabel('')
plt.show()
sc.pl.umap(ad_steleScanviMerged, color=['celltype.anno', 'specie', 'labelTransfer_scanvi_celltype.anno'], wspace=0.45, ncols=2)
ad_stele.uns['labelTransfer_scanvi_celltype.anno_colors'] = ad_steleScanviMerged.uns['labelTransfer_scanvi_celltype.anno_colors']
ad_stele.obsm['X_scANVI'] = ad_steleScanviMerged[ad_stele.obs.index].obsm['X_scANVI']
sc.pp.neighbors(ad_stele, 5, use_rep='X_scANVI')
sc.tl.umap(ad_stele, 0.3)
ax = sc.pl.umap(ad_stele, color='labelTransfer_scanvi_celltype.anno', show=False, title="Label transfer results\n(Soybean only)")
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.55, 0), frameon=False, ncol=2)
plt.xlabel('')
plt.ylabel('')
plt.show()
ax = sc.pl.umap(ad_stele, color='Sample_time', show=False, title='Sample')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.55, 0), frameon=False, ncol=3)
plt.xlabel('')
plt.ylabel('')
plt.show()
sc.tl.leiden(ad_stele, 0.2)
sc.pl.umap(ad_stele, color='leiden', title="Sub-cluster of vascular bundle", show=False)
plt.xlabel('')
plt.ylabel('')
plt.show()
import matplotlib.legend as mlegend
singleCellTools.plotting.plotLabelPercentageInCluster(ad_stele, 'leiden', 'labelTransfer_scanvi_celltype.anno')
ax = plt.gca()
# remove Metaxylem due to the empty percentage and not present in mature roots
leg = ax.get_legend()
ls_handles = mlegend._get_legend_handles([ax]) >> F(list)
ls_labels = leg.get_texts() >> F(map, lambda x:x.get_text()) >> F(list)
ls_handles = ls_handles[-len(ls_labels):][::-1]
ls_handles.pop(5)
ls_labels.pop(5)
plt.legend(ls_handles, ls_labels, loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
plt.show()
ad_stele.obsm['araMarkerOrthlogNoStage_auc'].columns = ad_stele.obsm['araMarkerOrthlogNoStage_auc'].columns >> F(map, str.lower) >> F(map, str.capitalize) >> F(list)
(
so.Plot(_df, x='leiden', y='AUC score', color='Cell type')
.add(so.Bar(edgewidth=0), so.Agg(), so.Dodge())
.add(so.Range(), so.Est(errorbar=('ci', 99)), so.Dodge(), legend=False)
.theme(dt_snsStyle)
.label(x='Cluster')
.layout(size=(8,4))
.scale(color='deep')
)
_ad = singleCellTools.plotting.obsmToObs(ad_stele, 'araMarkerOrthlogNoStage_auc')
axs = sc.pl.umap(_ad, color=_ls_order[:-1], cmap='Reds', ncols=3, vmax=0.15, show=False)
for ax in axs:
plt.sca(ax)
plt.title(ax.get_title(), fontdict=dict(fontsize=18))
dt_annoStele = {
0:'Xylem pole pericycle', 1 : "Stele (Unknown)", 2: "Stele (Unknown, phloem like)", 3: "Stele (Unknown)", 4:"Stele (Unknown, xylem like)",
5: "Phloem pole pericycle", 6: "Metaphloem & companion cell"
}
dt_annoStele = {str(x): f"vb-{x}: {y}" for x,y in dt_annoStele.items()}
ad_stele.obs['vb_ct'] = ad_stele.obs['leiden'].map(dt_annoStele)
ax = sc.pl.umap(ad_stele, color='leiden', wspace=0.2, show=False, title='Sub-cluster of vascular bundle')
handle, labels = ax.get_legend_handles_labels()
labels = [f"vb-{x}" for x in labels]
plt.xlabel('')
plt.ylabel('')
ax.legend(handle, labels, loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, ncol=1)
<matplotlib.legend.Legend at 0x2baf15aaff40>
ax = sc.pl.umap(ad_stele, color='vb_ct', wspace=0.2, show=False, title='Sub-cluster of vascular bundle')
handle, labels = ax.get_legend_handles_labels()
labels = [f"{x}" for x in labels]
plt.xlabel('')
plt.ylabel('')
ax.legend(handle, labels, loc='upper center', bbox_to_anchor=(0.5, 0), frameon=False, ncol=2)
<matplotlib.legend.Legend at 0x2bb5cb7b38b0>
ad.obs['vb_ct'] = ad.obs['Cell type'].copy().astype(str)
ad.obs.loc[ad_stele.obs.index, 'vb_ct'] = ad_stele.obs['vb_ct'].astype(str)
ad.obs['vb_ct'] = ad.obs['vb_ct'].astype('category')
singleCellTools.geneEnrichInfo.calculateEnrichScoreByCellex(ad, 'raw', 'vb_ct')
df_steleSpecGeneAllCluster = ad.uns['vb_ct_cellexES'].query(
"enrichScore > 0.75 & expressed_ratio > 0.1 & expressed_ratio / expressed_ratio_others > 2 & vb_ct.str.startswith('vb')"
)
dt_steleSpecGeneAllCluster = df_steleSpecGeneAllCluster.groupby('vb_ct').apply(lambda df:df.nlargest(5, 'enrichScore')["gene"].to_list()).to_dict()
ls_vbctOrder = [
"0: Uninfected cells*",
"1: Inner cortex",
"2: Outer cortex*",
"4: Outer cortex*",
"5: Epidermis",
"6: Unknown",
"7: Uninfected cells*",
"8: Endodermis",
"10: Unknown",
"11: Uninfected cells*",
"12: Infected cells",
"13: Unknown",
"14: Unknown",
"vb-0: Xylem pole pericycle",
"vb-1: Stele (Unknown)",
"vb-2: Stele (Unknown, phloem like)",
"vb-3: Stele (Unknown)",
"vb-4: Stele (Unknown, xylem like)",
"vb-5: Phloem pole pericycle",
"vb-6: Metaphloem & companion cell",
]
ad.obs['vb_ct'] = ad.obs['vb_ct'].cat.set_categories(ls_vbctOrder)
sc.pl.dotplot(
ad,
{x.split(':')[0]: y for x, y in dt_steleSpecGeneAllCluster.items() if y if x.startswith('vb')},
"vb_ct",
layer="normalize_log",
cmap="Reds",
standard_scale='var',
dot_max=0.4,
figsize=(16, 5)
)
df_steleSpecGeneAllCluster.to_excel(f"{dir_result}/vascular_bundle.xlsx")
df_steleSpecGeneAllCluster.merge(df_symbol, how="left", left_on="gene", right_index=True).eval(
"Symbol = Symbol.fillna('') \n other_designations = other_designations.fillna('')",
engine="python",
).to_excel(f"{dir_result}/stele_spec_genes.xlsx")
import glob
import re
lsDf_geneDup = []
for f in glob.glob(f"/public/home/liuzj/projects/singleCell/soybean/01_data/pdgd/*.pairs"):
df = pd.read_table(f, sep='\t')
df['Category'] = re.search(r'Gma.(\w+).pairs', f).group(1)
lsDf_geneDup.append(df)
df_geneDup = pd.concat(lsDf_geneDup)
df_geneDup.loc[lambda df:df['Category'] == 'transposed', 'Duplicate 1'] = df_geneDup.loc[lambda df:df['Category'] == 'transposed', 'Transposed']
df_geneDup.loc[lambda df:df['Category'] == 'transposed', 'Duplicate 2'] = df_geneDup.loc[lambda df:df['Category'] == 'transposed', 'Parental']
df_geneDup['Gene 1'] = df_geneDup['Duplicate 1'].str.replace("Glyma.", "GLYMA_").str.split('.').str[0]
df_geneDup['Gene 2'] = df_geneDup['Duplicate 2'].str.replace("Glyma.", "GLYMA_").str.split('.').str[0]
df_geneDup = df_geneDup.loc[lambda df: df.eval("`Gene 1` in @ad.var.index & `Gene 2` in @ad.var.index")]
_ls = ['GLYMA_08G071200', 'GLYMA_13G043800']
df_geneDup.query("`Gene 1` in @_ls | `Gene 2` in @_ls")
| Duplicate 1 | Location | Duplicate 2 | Location.1 | E-value | Category | Transposed | Parental | Gene 1 | Gene 2 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 14311 | Glyma.08G071200.1 | Gma-Chr08:5438763 | Glyma.13G043800.1 | Gma-Chr13:13707907 | 0.0 | dispersed | NaN | NaN | GLYMA_08G071200 | GLYMA_13G043800 |
ad_merged = singleCellTools.geneEnrichInfo._mergeData(ad, 'Cluster', layer='raw')
singleCellTools.basic.initLayer(ad_merged, total=1e6)
from scipy import stats
def getSpearmanR(sr, ad, layer, gene2='Gene 2'):
gene1 = sr.at['Gene 1']
gene2 = sr.at[gene2]
return stats.spearmanr(ad[:, gene1].layers[layer].reshape(-1), ad[:, gene2].layers[layer].reshape(-1))[0]
df_geneDup['Gene Shuffle'] = df_geneDup['Gene 2'].sample(frac=1, random_state=39).values
df_geneDup["SpearmanR"] = df_geneDup.apply(getSpearmanR, axis=1, ad=ad_merged, layer='normalize_log', gene2='Gene 2')
df_geneDup["SpearmanR_Shuffle"] = df_geneDup.apply(getSpearmanR, axis=1, ad=ad_merged, layer='normalize_log', gene2='Gene Shuffle')
(
so.Plot(df_geneDup, x='SpearmanR')
.facet(col='Category')
.add(so.Bars(), so.Hist(stat='density', bins=25))
.layout(size=(16,5))
)
sns.displot(df_geneDup['SpearmanR_Shuffle'])
sns.displot(df_geneDup['SpearmanR'])
<seaborn.axisgrid.FacetGrid at 0x2af2b2f1ac10>
import tqdm
ls_paralogGroup = []
df_geneDup = df_geneDup.drop_duplicates(['Gene 1', 'Gene 2'])
ls_onePairOrth = pd.concat([df_geneDup['Gene 1'], df_geneDup['Gene 2']]).sort_values().value_counts().loc[lambda sr:sr == 1].index.to_list()
df_paralog1v1Only = df_geneDup.query("`Gene 1` in @ls_onePairOrth & `Gene 2` in @ls_onePairOrth")[['Gene 1', 'Gene 2']].rename(columns={'Gene 1': 'G1', 'Gene 2': 'G2'})
def getSpearmanR(sr, ad, layer, gene2='Gene 2'):
gene1 = sr.at['G1']
gene2 = sr.at[gene2]
return stats.spearmanr(ad[:, gene1].layers[layer].reshape(-1), ad[:, gene2].layers[layer].reshape(-1))[0]
df_paralog1v1Only["SpearmanR"] = df_paralog1v1Only.apply(getSpearmanR, axis=1, ad=ad_merged, layer='normalize_log', gene2='G2')
sns.displot(df_paralog1v1Only['SpearmanR'], bins=50)
<seaborn.axisgrid.FacetGrid at 0x2badf3b27520>
df_paralog1v1Only['Gene Shuffle'] = df_paralog1v1Only['G2'].sample(frac=1, random_state=39, replace=True).values
df_paralog1v1Only["SpearmanR_shuffle"] = df_paralog1v1Only.apply(getSpearmanR, axis=1, ad=ad_merged, layer='normalize_log', gene2='Gene Shuffle')
import matplotlib as mpl
fig, ax = plt.subplots(figsize=(2,3))
ax = sns.boxplot(data = df_paralog1v1Only[['SpearmanR', 'SpearmanR_shuffle']].melt(), x='variable', y='value', ax=ax, fliersize=0, width=0.5)
for i, (box, color) in enumerate(zip([x for x in ax.get_children() if isinstance(x, mpl.patches.PathPatch)], sns.color_palette())):
box.set_edgecolor(color)
box.set_facecolor("white")
# iterate over whiskers and median lines
for j in range(6 * i, 6 * (i + 1)):
ax.lines[j].set_color(color)
sns.despine()
plt.ylabel("Spearman's rank\ncorrelation coefficient", fontsize=13)
plt.xticks([0,1], ['Duplicated Pairs', 'Shuffled'], rotation=-30, ha='left', fontsize=13)
plt.xlabel('')
plt.show()
import scipy.stats
scipy.stats.ttest_ind(df_paralog1v1Only['SpearmanR'], df_paralog1v1Only['SpearmanR_shuffle']).pvalue
6.748250002626319e-150
from joblib import Parallel, delayed
singleCellTools.geneEnrichInfo.getBgGene(ad, df_paralog1v1Only['G2'].to_list())
['GLYMA_16G180100', 'GLYMA_03G029100', 'GLYMA_15G276900', 'GLYMA_08G286300', 'GLYMA_08G204900', 'GLYMA_01G044000', 'GLYMA_10G046100', 'GLYMA_18G226700', 'GLYMA_02G082500', 'GLYMA_01G184900', 'ENSRNA050001470', 'GLYMA_05G211000', 'GLYMA_09G146000', 'GLYMA_13G198100', 'GLYMA_11G048800', 'GLYMA_14G156800', 'GLYMA_02G239000', 'GLYMA_01G245000', 'GLYMA_13G355700', 'GLYMA_12G063300', 'GLYMA_16G131700', 'GLYMA_15G073800', 'GLYMA_15G125600', 'GLYMA_07G096300', 'GLYMA_05G069300', 'GLYMA_01G152200', 'GLYMA_16G209700', 'GLYMA_06G092600', 'GLYMA_03G193200', 'GLYMA_07G250300', 'GLYMA_13G277300', 'GLYMA_13G152800', 'GLYMA_17G214500', 'GLYMA_03G054100', 'GLYMA_16G158800', 'GLYMA_08G291200', 'GLYMA_08G215900', 'GLYMA_01G123100', 'GLYMA_10G001800', 'GLYMA_19G045900', 'GLYMA_02G121800', 'GLYMA_01G224500', 'GLYMA_04G045200', 'GLYMA_05G155900', 'GLYMA_09G183000', 'GLYMA_13G213500', 'GLYMA_11G065300', 'GLYMA_15G110400', 'GLYMA_02G297100', 'GLYMA_02G013900', 'GLYMA_14G045700', 'GLYMA_12G087900', 'GLYMA_17G086400', 'GLYMA_15G255500', 'GLYMA_16G024700', 'GLYMA_07G210000', 'GLYMA_05G002100', 'GLYMA_01G204100', 'GLYMA_17G261000', 'GLYMA_06G085100', 'GLYMA_03G216800', 'GLYMA_08G007800', 'GLYMA_13G339400', 'GLYMA_13G167700', 'GLYMA_20G213100', 'GLYMA_07G252200', 'GLYMA_19G088600', 'GLYMA_10G287300', 'GLYMA_12G166600', 'GLYMA_06G316300', 'GLYMA_10G134400', 'GLYMA_12G026800', 'GLYMA_18G203700', 'GLYMA_12G174300', 'GLYMA_17G130500', 'GLYMA_03G124500', 'GLYMA_16G117600', 'GLYMA_08G251600', 'GLYMA_08G188600', 'GLYMA_01G158700', 'GLYMA_10G107900', 'GLYMA_19G045300', 'GLYMA_02G214600', 'GLYMA_01G237700', 'GLYMA_04G099300', 'GLYMA_05G184300', 'GLYMA_09G254000', 'GLYMA_13G338300', 'GLYMA_11G196800', 'GLYMA_15G073100', 'GLYMA_03G016600', 'GLYMA_02G068200', 'GLYMA_14G084000', 'GLYMA_12G199900', 'GLYMA_17G038600', 'GLYMA_15G190300', 'GLYMA_15G236900', 'GLYMA_07G111300', 'GLYMA_05G049700', 'GLYMA_01G212600', 'GLYMA_17G192500', 'ENSRNA050002921', 'GLYMA_03G254200', 'GLYMA_07G170800', 'GLYMA_14G012600', 'GLYMA_13G268200', 'GLYMA_20G196800', 'GLYMA_07G157800', 'GLYMA_19G080800', 'GLYMA_11G106200', 'GLYMA_13G029100', 'GLYMA_11G170800', 'GLYMA_03G008100', 'GLYMA_16G027000', 'GLYMA_08G218000', 'GLYMA_08G116600', 'GLYMA_01G075500', 'GLYMA_08G361400', 'GLYMA_16G163300', 'GLYMA_02G086700', 'GLYMA_01G210000', 'GLYMA_03G253700', 'GLYMA_05G084900', 'GLYMA_10G214300', 'GLYMA_13G045700', 'GLYMA_17G203500', 'GLYMA_18G274300', 'GLYMA_02G222700', 'GLYMA_02G003800', 'GLYMA_10G237200', 'GLYMA_11G195100', 'GLYMA_16G180200', 'GLYMA_15G125000', 'GLYMA_18G121800', 'GLYMA_17G123400', 'GLYMA_03G087400', 'GLYMA_16G169200', 'GLYMA_08G232300', 'GLYMA_08G177700', 'GLYMA_01G137600', 'GLYMA_09G263400', 'GLYMA_19G046800', 'GLYMA_02G179100', 'GLYMA_02G014500', 'GLYMA_04G097200', 'GLYMA_05G177200', 'GLYMA_09G135700', 'GLYMA_13G258200', 'GLYMA_11G061900', 'GLYMA_15G110800', 'GLYMA_02G306600', 'GLYMA_02G086300', 'GLYMA_14G067700', 'GLYMA_12G049500', 'GLYMA_17G024100', 'GLYMA_15G228000', 'GLYMA_15G266800', 'GLYMA_07G121100', 'GLYMA_05G082000', 'GLYMA_01G231200', 'GLYMA_17G152800', 'GLYMA_06G084500', 'GLYMA_03G213900', 'GLYMA_07G233600', 'GLYMA_13G342100', 'GLYMA_13G173000', 'GLYMA_20G192100', 'GLYMA_07G194100', 'GLYMA_19G063100', 'GLYMA_11G005600', 'GLYMA_12G152000', 'GLYMA_06G300500', 'GLYMA_10G104700', 'GLYMA_11G238600', 'GLYMA_18G223500', 'GLYMA_12G178900', 'GLYMA_03G044400', 'GLYMA_08G080900', 'GLYMA_07G113500', 'GLYMA_16G187300', 'GLYMA_17G063400', 'GLYMA_03G008800', 'GLYMA_16G034200', 'GLYMA_08G204500', 'GLYMA_08G101000', 'GLYMA_01G077100', 'GLYMA_09G206300', 'GLYMA_19G002300', 'GLYMA_02G130700', 'GLYMA_02G009300', 'GLYMA_03G213400', 'GLYMA_05G128100', 'GLYMA_09G059800', 'GLYMA_13G087400', 'GLYMA_10G275300', 'GLYMA_14G201100', 'GLYMA_02G247100', 'GLYMA_02G054100', 'GLYMA_13G258300', 'GLYMA_11G236000', 'GLYMA_16G186600', 'GLYMA_15G100000', 'GLYMA_15G120600', 'GLYMA_07G074600', 'GLYMA_04G257100', 'GLYMA_01G240900', 'GLYMA_17G087700', 'GLYMA_06G055300', 'GLYMA_03G143800', 'GLYMA_07G182500', 'GLYMA_13G170800', 'GLYMA_13G031800', 'GLYMA_20G164200', 'GLYMA_07G150500', 'GLYMA_19G013000', 'GLYMA_10G233600', 'GLYMA_17G113100', 'GLYMA_03G042900', 'GLYMA_16G107800', 'GLYMA_08G163500', 'GLYMA_08G077900', 'GLYMA_01G087600', 'GLYMA_09G144700', 'GLYMA_19G009200', 'GLYMA_02G119600', 'GLYMA_01G213000', 'GLYMA_04G029200', 'GLYMA_05G042300', 'GLYMA_09G011600', 'GLYMA_13G149400', 'GLYMA_10G278800', 'GLYMA_15G010700', 'GLYMA_02G253100', 'GLYMA_02G028500', 'GLYMA_13G324900', 'GLYMA_12G006300', 'ENSRNA050000772', 'GLYMA_15G195800', 'GLYMA_15G236800', 'GLYMA_07G015900', 'GLYMA_04G229700', 'GLYMA_01G191700', 'GLYMA_17G134600', 'GLYMA_05G220900', 'GLYMA_03G176600', 'GLYMA_07G131500', 'GLYMA_13G256400', 'GLYMA_13G096200', 'GLYMA_20G162200', 'GLYMA_07G060600', 'GLYMA_19G054500', 'GLYMA_10G237900', 'GLYMA_12G161600', 'GLYMA_06G155100', 'GLYMA_10G025800', 'GLYMA_11G192600', 'GLYMA_18G183100', 'GLYMA_12G165200', 'GLYMA_02G305500', 'GLYMA_07G254000', 'ENSRNA050030299', 'GLYMA_11G096300', 'GLYMA_02G311400', 'ENSRNA050001983', 'GLYMA_08G099600', 'GLYMA_08G046300', 'GLYMA_01G110400', 'GLYMA_08G247900', 'GLYMA_16G104500', 'GLYMA_02G160800', 'GLYMA_01G218200', 'GLYMA_03G230000', 'GLYMA_05G008400', 'GLYMA_10G020100', 'GLYMA_13G090600', 'GLYMA_17G163100', 'GLYMA_18G278100', 'GLYMA_02G277000', 'GLYMA_02G031800', 'GLYMA_10G099100', 'GLYMA_11G105300', 'GLYMA_16G119500', 'GLYMA_15G104600', 'GLYMA_18G042400', 'GLYMA_07G028900', 'GLYMA_04G187800', 'GLYMA_01G192900', 'GLYMA_10G215100', 'GLYMA_05G195900', 'GLYMA_03G153200', 'GLYMA_07G108400', 'GLYMA_13G167200', 'GLYMA_15G029400', 'GLYMA_20G048500', 'GLYMA_07G096700', 'GLYMA_08G366800', 'GLYMA_10G016000', 'GLYMA_10G177300', 'GLYMA_06G201200', 'GLYMA_19G101300', 'GLYMA_19G176000', 'GLYMA_17G048600', 'GLYMA_02G271400', 'GLYMA_16G059100', 'GLYMA_08G266900', 'GLYMA_08G163200', 'GLYMA_01G098500', 'GLYMA_09G274400', 'GLYMA_18G244600', 'GLYMA_02G060300', 'GLYMA_01G201800', 'GLYMA_04G066300', 'GLYMA_05G142400', 'GLYMA_09G120500', 'GLYMA_13G092000', 'GLYMA_10G276800', 'GLYMA_14G117900', 'GLYMA_02G199100', 'GLYMA_01G239800', 'GLYMA_13G321900', 'GLYMA_12G020400', 'GLYMA_16G196700', 'GLYMA_15G112500', 'GLYMA_15G143500', 'GLYMA_07G127400', 'GLYMA_05G007700', 'GLYMA_01G174400', 'GLYMA_17G060600', 'GLYMA_06G088800', 'GLYMA_03G162400', 'GLYMA_07G234700', 'GLYMA_13G257500', 'GLYMA_13G029600', 'GLYMA_20G131100', 'GLYMA_07G198500', 'GLYMA_18G260000', 'GLYMA_10G239700', 'GLYMA_12G136600', 'GLYMA_07G019300', 'GLYMA_10G107000', 'GLYMA_11G197600', 'GLYMA_18G031200', 'GLYMA_12G142100', 'GLYMA_02G243500', 'GLYMA_17G100300', 'GLYMA_03G114300', 'GLYMA_16G151200', 'GLYMA_08G351400', 'GLYMA_08G254300', 'GLYMA_01G101000', 'GLYMA_10G045700', 'GLYMA_19G028000', 'GLYMA_02G184700', 'GLYMA_01G240400', 'GLYMA_04G049000', 'GLYMA_05G134200', 'GLYMA_09G230300', 'GLYMA_13G112400', 'GLYMA_11G036100', 'GLYMA_15G052400', 'GLYMA_03G001300', 'GLYMA_02G058200', 'GLYMA_13G344700', 'GLYMA_12G033800', 'GLYMA_17G038200', 'GLYMA_15G221800', 'GLYMA_15G262900', 'GLYMA_07G226000', 'GLYMA_04G247100', 'GLYMA_01G201200', 'GLYMA_17G125200', 'GLYMA_06G053500', 'GLYMA_03G206800', 'GLYMA_08G053000', 'GLYMA_13G221500', 'GLYMA_13G038300', 'GLYMA_20G180500', 'GLYMA_08G019000', 'GLYMA_19G080700', 'GLYMA_11G007400', 'GLYMA_12G106800', 'GLYMA_12G052200', 'GLYMA_03G007900', 'GLYMA_16G143600', 'GLYMA_08G257700', 'GLYMA_08G151700', 'GLYMA_01G146300', 'GLYMA_09G044100', 'GLYMA_17G051100', 'GLYMA_02G151500', 'GLYMA_02G014700', 'GLYMA_04G037800', 'GLYMA_05G107300', 'GLYMA_11G026900', 'GLYMA_13G154700', 'GLYMA_18G044900', 'GLYMA_19G086700', 'GLYMA_02G264800', 'GLYMA_02G067100', 'GLYMA_11G097600', 'GLYMA_12G055200', 'GLYMA_17G056800', 'GLYMA_15G226200', 'GLYMA_18G190100', 'GLYMA_07G073200', 'GLYMA_05G009100', 'GLYMA_01G238000', 'GLYMA_11G188800', 'GLYMA_05G238400', 'GLYMA_03G163800', 'GLYMA_07G221900', 'GLYMA_13G272300', 'GLYMA_15G158200', 'GLYMA_20G083000', 'GLYMA_07G181900', 'GLYMA_09G276300', 'GLYMA_11G025800', 'GLYMA_11G157200', 'GLYMA_06G202200', 'GLYMA_19G175800', 'GLYMA_19G232600', 'GLYMA_16G126900', 'GLYMA_13G052800', 'GLYMA_17G102100', 'GLYMA_03G034800', 'GLYMA_16G026100', 'GLYMA_08G310500', 'GLYMA_08G222800', 'GLYMA_01G104600', 'GLYMA_10G065200', 'GLYMA_19G029800', 'GLYMA_02G134200', 'GLYMA_01G214900', 'GLYMA_03G229900', 'GLYMA_04G245900', 'GLYMA_09G248200', 'GLYMA_13G200100', 'GLYMA_11G065800', 'GLYMA_14G176100', 'GLYMA_02G286200', 'GLYMA_02G018600', 'GLYMA_13G370800', 'GLYMA_12G073700', 'GLYMA_17G018300', 'GLYMA_15G098400', 'GLYMA_15G109600', 'GLYMA_07G068500', 'GLYMA_04G136600', 'GLYMA_01G186800', 'GLYMA_17G120700', 'GLYMA_05G151000', 'GLYMA_03G126000', 'GLYMA_07G192200', 'GLYMA_13G320600', 'GLYMA_13G137000', 'GLYMA_20G191000', 'GLYMA_07G163100', 'GLYMA_19G042800', 'GLYMA_11G022400', 'GLYMA_12G184200', 'GLYMA_06G166100', 'GLYMA_10G187900', 'GLYMA_12G033300', 'GLYMA_18G138600', 'GLYMA_12G194200', 'GLYMA_02G309100', 'GLYMA_08G098500', 'GLYMA_07G032700', 'GLYMA_16G133600', 'GLYMA_01G067300', 'GLYMA_19G155300', 'GLYMA_15G169600', 'GLYMA_11G166700', 'GLYMA_07G046400', 'GLYMA_19G203500', 'GLYMA_18G062500', 'GLYMA_06G014200', 'GLYMA_04G081400', 'GLYMA_15G114500', 'GLYMA_07G196100', 'GLYMA_20G005500', 'GLYMA_18G289700', 'GLYMA_05G161300', 'GLYMA_17G084400', 'GLYMA_02G280600', 'GLYMA_16G123600', 'GLYMA_09G000400', 'GLYMA_08G250800', 'GLYMA_01G111000', 'GLYMA_10G133900', 'GLYMA_19G002700', 'GLYMA_02G072800', 'GLYMA_01G200400', 'GLYMA_03G251600', 'GLYMA_05G159000', 'GLYMA_09G265200', 'GLYMA_13G287400', 'GLYMA_11G175800', 'GLYMA_15G082600', 'GLYMA_02G200400', 'GLYMA_01G239400', 'GLYMA_14G105800', 'GLYMA_12G140300', 'GLYMA_17G015500', 'GLYMA_15G265200', 'GLYMA_16G004900', 'GLYMA_07G203300', 'GLYMA_05G028900', 'GLYMA_01G170000', 'GLYMA_17G098700', 'GLYMA_06G066800', 'GLYMA_03G097300', 'GLYMA_08G045400', 'GLYMA_14G023600', 'GLYMA_13G221000', 'GLYMA_20G191100', 'GLYMA_08G020600', 'GLYMA_19G011400', 'GLYMA_11G119700', 'GLYMA_12G233000', 'GLYMA_07G046900', 'GLYMA_10G243800', 'GLYMA_12G085800', 'GLYMA_18G113400', 'ENSRNA050003713', 'GLYMA_02G256000', 'GLYMA_08G162400', 'GLYMA_07G196400', 'GLYMA_16G194800', 'GLYMA_01G077900', 'GLYMA_19G095500', 'GLYMA_16G037200', 'GLYMA_12G025400', 'GLYMA_07G202500', 'GLYMA_19G197900', 'GLYMA_18G041600', 'GLYMA_06G189100', 'GLYMA_05G008300', 'GLYMA_17G104000', 'GLYMA_02G302800', 'GLYMA_16G094300', 'GLYMA_08G185200', 'GLYMA_08G134900', 'GLYMA_01G147400', 'GLYMA_10G013600', 'GLYMA_19G019600', 'GLYMA_02G201600', 'GLYMA_02G045600', 'GLYMA_04G060600', 'GLYMA_05G138500', 'GLYMA_09G168800', 'GLYMA_13G222700', 'GLYMA_10G283800', 'GLYMA_15G055600', 'GLYMA_02G272900', 'GLYMA_02G105900', 'GLYMA_13G348400', 'GLYMA_12G047200', 'GLYMA_17G019000', 'GLYMA_15G121700', 'GLYMA_15G143900', 'GLYMA_07G080300', 'GLYMA_05G033400', 'GLYMA_02G020500', 'GLYMA_17G115400', 'GLYMA_06G042200', 'GLYMA_03G153600', 'GLYMA_07G140200', 'GLYMA_13G256000', 'GLYMA_13G178400', 'GLYMA_20G199600', 'GLYMA_07G126000', 'GLYMA_19G061500', 'GLYMA_10G258600', 'GLYMA_12G194900', 'GLYMA_06G224900', 'GLYMA_10G134500', 'GLYMA_11G217900', 'GLYMA_18G140700', 'GLYMA_12G198600', 'GLYMA_02G282500', 'GLYMA_08G008600', 'GLYMA_07G057300', 'GLYMA_16G133800', 'GLYMA_01G078000', 'GLYMA_19G153800', 'GLYMA_15G242900', 'GLYMA_11G069900', 'GLYMA_07G077500', 'GLYMA_19G246800', 'GLYMA_18G074600', 'GLYMA_06G136000', 'GLYMA_12G193600', 'GLYMA_03G200600', 'GLYMA_17G033900', 'GLYMA_09G050600', 'GLYMA_08G322100', 'GLYMA_01G184500', 'GLYMA_09G242900', 'GLYMA_17G108800', 'GLYMA_02G240300', 'GLYMA_02G027200', 'GLYMA_04G215800', 'GLYMA_06G084600', 'GLYMA_11G113300', 'GLYMA_13G236300', 'GLYMA_18G041500', 'GLYMA_19G021300', 'GLYMA_03G116700', 'GLYMA_02G127300', 'GLYMA_11G219600', 'GLYMA_12G212700', 'GLYMA_17G117000', 'GLYMA_16G021700', 'GLYMA_18G185900', 'GLYMA_07G270500', 'GLYMA_05G188700', 'GLYMA_02G017000', 'GLYMA_12G062000', 'GLYMA_06G261000', 'GLYMA_04G024000', 'GLYMA_08G060600', 'GLYMA_13G362600', 'GLYMA_15G242200', 'GLYMA_20G022500', 'GLYMA_08G046500', 'GLYMA_10G113700', 'GLYMA_11G111800', 'GLYMA_12G033100', 'GLYMA_07G140500', 'GLYMA_19G124300', 'GLYMA_19G203900', 'GLYMA_17G002800', 'GLYMA_13G155300', 'GLYMA_03G131200', 'GLYMA_08G188100', 'GLYMA_07G265300', 'GLYMA_10G197800', 'GLYMA_01G105400', 'GLYMA_17G042900', 'GLYMA_20G206200', 'GLYMA_17G147400', 'GLYMA_03G034000', 'GLYMA_16G169900', 'GLYMA_08G269000', 'GLYMA_08G157200', 'GLYMA_01G062200', 'GLYMA_09G268500', 'GLYMA_19G053100', 'GLYMA_02G092000', 'GLYMA_01G226000', 'GLYMA_03G235400', 'GLYMA_05G114100', 'GLYMA_09G198800', 'GLYMA_13G271500', 'GLYMA_11G054000', 'GLYMA_15G084200', 'GLYMA_02G297900', 'GLYMA_02G037000', 'GLYMA_14G074100', 'GLYMA_12G078400', 'GLYMA_17G078500', 'GLYMA_15G271200', 'GLYMA_16G019400', 'GLYMA_07G113300', 'GLYMA_05G002000', 'GLYMA_01G205700', 'GLYMA_17G164300', 'GLYMA_06G046400', 'GLYMA_03G153900', 'GLYMA_07G184600', 'GLYMA_14G015800', 'GLYMA_13G202100', 'GLYMA_20G199800', 'GLYMA_07G168600', 'GLYMA_19G116200', 'GLYMA_10G281700', 'GLYMA_12G172700', 'GLYMA_06G305000', 'GLYMA_10G119100', 'GLYMA_12G008400', 'GLYMA_18G184100', 'GLYMA_12G186000', 'GLYMA_03G004600', 'GLYMA_08G039900', 'GLYMA_07G093000', 'GLYMA_16G212300', 'GLYMA_01G033000', 'GLYMA_19G166700', 'GLYMA_16G069100', 'GLYMA_11G138600', 'GLYMA_07G107500', 'GLYMA_20G003900', 'GLYMA_18G048800', 'GLYMA_06G169400', 'GLYMA_04G225500', 'GLYMA_16G022200', 'GLYMA_07G188600', 'GLYMA_17G096400', 'GLYMA_03G061200', 'GLYMA_16G073100', 'GLYMA_09G013400', 'GLYMA_08G323700', 'GLYMA_01G154200', 'GLYMA_10G119400', 'GLYMA_19G012900', 'GLYMA_02G145600', 'GLYMA_01G244200', 'GLYMA_04G038800', 'GLYMA_06G010800', 'GLYMA_09G249300', 'GLYMA_13G269900', 'GLYMA_11G121200', 'GLYMA_15G020800', 'GLYMA_02G309700', 'GLYMA_02G064100', 'GLYMA_14G046400', 'GLYMA_12G104100', 'GLYMA_17G011800', 'GLYMA_15G106800', 'GLYMA_15G131500', 'GLYMA_07G254400', 'GLYMA_05G124400', 'GLYMA_01G223500', 'GLYMA_17G137100', 'GLYMA_06G172100', 'GLYMA_03G196800', 'GLYMA_08G073700', 'GLYMA_13G345000', 'GLYMA_13G236600', 'GLYMA_20G128900', 'GLYMA_08G058100', 'GLYMA_19G023200', 'GLYMA_11G066100', 'GLYMA_13G030200', 'GLYMA_07G075300', 'GLYMA_10G237300', 'GLYMA_12G036900', 'GLYMA_18G102400', 'GLYMA_13G035100', 'GLYMA_03G032500', 'GLYMA_08G204700', 'GLYMA_07G245000', 'GLYMA_16G117400', 'GLYMA_01G091800', 'GLYMA_19G133700', 'GLYMA_15G273700', 'GLYMA_11G189100', 'GLYMA_07G251800', 'GLYMA_19G201400', 'GLYMA_18G022800', 'GLYMA_06G298500', 'GLYMA_05G046700', 'GLYMA_15G146200', 'GLYMA_08G077700', 'GLYMA_12G085900', 'GLYMA_02G312300', 'GLYMA_16G014700', 'GLYMA_08G350100', 'GLYMA_08G230400', 'GLYMA_01G152900', 'GLYMA_09G135200', 'GLYMA_16G160200', 'GLYMA_02G150100', 'GLYMA_01G236500', 'GLYMA_04G003600', 'GLYMA_05G154600', 'GLYMA_11G075300', 'GLYMA_13G197700', 'GLYMA_17G190400', 'GLYMA_19G011100', 'GLYMA_02G272000', 'GLYMA_02G024100', 'GLYMA_11G134200', 'GLYMA_12G088300', 'GLYMA_16G175200', 'GLYMA_15G084800', 'GLYMA_18G116700', 'GLYMA_07G202900', 'GLYMA_05G054800', 'GLYMA_01G203700', 'GLYMA_11G216700', 'GLYMA_06G071400', 'GLYMA_03G165600', 'GLYMA_08G040400', 'GLYMA_13G249400', 'GLYMA_15G042900', 'GLYMA_19G245300', 'GLYMA_08G028800', 'GLYMA_10G009500', 'GLYMA_11G075200', 'GLYMA_11G167200', 'GLYMA_07G065400', 'GLYMA_19G128400', 'GLYMA_19G172400', 'GLYMA_15G264800', 'GLYMA_13G070100', 'GLYMA_02G286300', 'GLYMA_08G141300', 'GLYMA_07G164900', 'GLYMA_10G134100', 'GLYMA_01G097600', 'GLYMA_16G024800', 'GLYMA_20G198500', 'GLYMA_11G172700', 'GLYMA_07G178100', 'GLYMA_19G197600', 'GLYMA_18G203300', 'GLYMA_06G214700', 'GLYMA_05G005000', 'GLYMA_15G137900', 'GLYMA_08G041600', 'GLYMA_15G005400', 'GLYMA_18G193600', 'GLYMA_06G091500', 'GLYMA_18G249200', 'GLYMA_17G063800', 'GLYMA_02G268200', 'GLYMA_15G243000', 'GLYMA_08G204400', 'GLYMA_08G155900', 'GLYMA_01G087000', 'GLYMA_09G163700', 'GLYMA_19G005100', 'GLYMA_02G079200', 'GLYMA_01G162300', 'GLYMA_04G004900', 'GLYMA_05G161400', 'GLYMA_09G032200', 'GLYMA_13G159800', 'GLYMA_11G028300', 'GLYMA_14G209100', 'GLYMA_02G222000', 'GLYMA_01G228400', 'GLYMA_13G358800', 'GLYMA_11G250400', 'GLYMA_16G149900', 'GLYMA_15G042300', 'GLYMA_15G064800', 'GLYMA_07G108100', 'GLYMA_04G249500', 'GLYMA_01G147000', 'GLYMA_17G107900', 'GLYMA_06G032300', 'GLYMA_03G125400', 'GLYMA_07G240700', 'GLYMA_13G276300', 'GLYMA_13G119100', 'GLYMA_20G175900', 'GLYMA_07G211900', 'GLYMA_19G013800', 'GLYMA_10G273200', 'GLYMA_12G095000', 'GLYMA_06G287200', 'GLYMA_10G057300', 'GLYMA_11G187300', 'GLYMA_18G087500', 'GLYMA_12G107000', 'GLYMA_02G230700', 'GLYMA_08G071200', 'GLYMA_07G086400', 'GLYMA_16G022400', 'GLYMA_01G048600', 'GLYMA_19G095000', 'GLYMA_15G142700', 'GLYMA_11G098900', 'GLYMA_07G091400', 'GLYMA_19G198300', 'GLYMA_18G007600', 'GLYMA_06G141000', 'GLYMA_04G202100', 'GLYMA_15G069600', 'GLYMA_07G251000', 'GLYMA_19G243600', 'GLYMA_18G259700', 'GLYMA_06G067500', 'GLYMA_14G166900', 'GLYMA_01G010000', 'GLYMA_08G238600', 'GLYMA_12G161500', 'GLYMA_03G201300', 'GLYMA_18G038800', 'GLYMA_06G087200', 'GLYMA_08G023400', 'GLYMA_11G146700', 'GLYMA_11G048900', 'GLYMA_08G241400', 'GLYMA_09G218700', 'GLYMA_11G021600', 'GLYMA_17G074300', 'GLYMA_03G028300', 'GLYMA_16G052400', 'GLYMA_08G205900', 'GLYMA_08G145100', 'GLYMA_01G108400', 'GLYMA_09G244900', 'GLYMA_18G285300', 'GLYMA_02G144100', 'GLYMA_01G214500', 'GLYMA_04G078600', 'GLYMA_05G108600', 'GLYMA_09G076200', 'GLYMA_13G117600', 'GLYMA_11G047200', 'GLYMA_14G090900', 'GLYMA_02G276400', 'GLYMA_02G034100', 'GLYMA_13G292500', 'GLYMA_11G200500', 'GLYMA_16G195800', 'GLYMA_15G027400', 'GLYMA_15G072800', 'GLYMA_07G096100', 'GLYMA_05G042800', 'GLYMA_01G208700', 'GLYMA_17G102000', 'GLYMA_06G002600', 'GLYMA_03G203800', 'GLYMA_07G166800', 'GLYMA_13G190800', 'GLYMA_13G074700', 'GLYMA_20G195400', 'GLYMA_07G135800', 'GLYMA_18G296900', 'GLYMA_10G290300', 'GLYMA_12G031300', 'GLYMA_06G229200', 'GLYMA_10G059800', 'GLYMA_11G134700', 'GLYMA_18G067600', 'GLYMA_12G039400', 'GLYMA_03G000800', 'GLYMA_08G049200', 'GLYMA_07G055400', 'GLYMA_16G118600', 'GLYMA_01G085500', 'GLYMA_19G081500', 'GLYMA_15G200600', 'GLYMA_11G089600', 'GLYMA_07G089200', 'GLYMA_19G181200', 'GLYMA_18G026200', 'GLYMA_06G091600', 'GLYMA_05G007400', 'GLYMA_15G074900', 'GLYMA_07G201000', 'GLYMA_19G242800', 'GLYMA_18G271400', 'GLYMA_06G016600', 'GLYMA_14G061700', 'GLYMA_17G117500', 'GLYMA_03G123500', 'GLYMA_16G104700', 'GLYMA_08G209900', 'GLYMA_08G156200', 'GLYMA_01G122800', 'GLYMA_09G275300', 'GLYMA_18G227400', 'GLYMA_02G177100', 'GLYMA_02G009600', 'GLYMA_04G054600', 'GLYMA_05G179000', 'GLYMA_09G118500', 'GLYMA_13G214700', 'GLYMA_11G013700', 'GLYMA_15G004000', 'GLYMA_02G303400', 'GLYMA_02G054700', 'GLYMA_13G328600', 'GLYMA_12G011400', 'GLYMA_17G049500', 'GLYMA_15G114600', 'GLYMA_15G167700', 'GLYMA_07G103200', 'GLYMA_05G053600', 'GLYMA_01G227400', 'GLYMA_17G141600', 'GLYMA_06G081400', 'GLYMA_03G197100', 'GLYMA_07G215500', 'GLYMA_13G289100', 'GLYMA_13G153300', 'GLYMA_20G153300', 'GLYMA_07G194000', 'GLYMA_18G244300', 'GLYMA_10G265100', 'GLYMA_12G102600', 'GLYMA_07G022000', 'GLYMA_10G089200', 'GLYMA_11G203300', 'GLYMA_18G074400', 'GLYMA_12G120400', 'GLYMA_03G046500', 'GLYMA_08G055700', 'GLYMA_07G088200', 'GLYMA_16G136100', 'GLYMA_01G053100', 'GLYMA_19G045700', 'GLYMA_16G000100', 'GLYMA_11G111000', ...]
for i in tqdm.tqdm(range(1000)):
df_paralog1v1Only[f"seed_{i}_bg"] = singleCellTools.geneEnrichInfo.getBgGene(ad, df_paralog1v1Only['G2'].to_list(), seed=i, usePreBin='bins_ForPickMock')
100%|██████████| 1000/1000 [02:00<00:00, 8.33it/s]
def getBootstrapPearson(df:pd.DataFrame, ad):
ls_result = []
for nt in df.itertuples():
ls_oneLineResult = []
g1 = nt.G1
for i in range(1000):
g2 = getattr(nt, f"seed_{i}_bg")
g2_corr = stats.spearmanr(ad[:, g1].layers['normalize_log'].reshape(-1), ad[:, g2].layers['normalize_log'].reshape(-1))[0]
ls_oneLineResult.append(g2_corr)
ls_result.append(ls_oneLineResult)
df_result = pd.DataFrame(ls_result, columns=[f"seed_{i}_corr" for i in range(1000)], index=df.index)
df_result = pd.concat([df, df_result], axis=1)
return df_result
df_paralog1v1Only['joblib_group'] = np.random.random_integers(0, 63, size=len(df_paralog1v1Only))
_lsDf = Parallel(64)(delayed(getBootstrapPearson)(x[1], ad_merged) for x in df_paralog1v1Only.groupby('joblib_group'))
df_paralogBootstrapResult = pd.concat(_lsDf)
df_paralogBootstrapResult = df_paralogBootstrapResult.reset_index(drop=True)
df_paralogBootstrapResult['G2_rank'] = df_paralogBootstrapResult[['SpearmanR']].join(df_paralogBootstrapResult.filter(regex=r"seed_\d+_corr")).rank(1, method='first', ascending=False)['SpearmanR']
sns.displot(df_paralogBootstrapResult['G2_rank'] / 1000, bins=100)
plt.xlabel('P value')
plt.axvline(x=0.05, ls='--', color='black')
plt.show()
df_paralogBootstrapResult['G2_rank'] = df_paralogBootstrapResult[['SpearmanR']].join(df_paralogBootstrapResult.filter(regex=r"seed_\d+_corr")).rank(1, method='first', ascending=False)['SpearmanR']
df_paralogBootstrapResult['p'] = df_paralogBootstrapResult['G2_rank'] / 1000
df_paralogBootstrapResult.query("p < 0.05")
| G1 | G2 | SpearmanR | seed_0_bg | seed_1_bg | seed_2_bg | seed_3_bg | seed_4_bg | seed_5_bg | seed_6_bg | ... | seed_992_corr | seed_993_corr | seed_994_corr | seed_995_corr | seed_996_corr | seed_997_corr | seed_998_corr | seed_999_corr | G2_rank | p | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | GLYMA_02G186700 | GLYMA_10G106900 | 0.896429 | GLYMA_11G192600 | GLYMA_03G250200 | GLYMA_09G261700 | GLYMA_09G263200 | GLYMA_15G106400 | GLYMA_13G053700 | GLYMA_08G359500 | ... | -0.150585 | 0.472805 | -0.247133 | -0.435798 | -0.016116 | -0.760452 | 0.636220 | 0.229129 | 1.0 | 0.001 |
| 5 | GLYMA_02G284400 | GLYMA_14G030600 | 0.732143 | GLYMA_04G066300 | GLYMA_19G039100 | GLYMA_19G076800 | GLYMA_13G286900 | GLYMA_19G156000 | GLYMA_20G222800 | GLYMA_06G032100 | ... | 0.064517 | -0.527949 | 0.225492 | 0.005406 | 0.028204 | 0.195760 | 0.141019 | -0.265498 | 1.0 | 0.001 |
| 13 | GLYMA_05G148900 | GLYMA_08G105600 | 0.764969 | GLYMA_13G358800 | GLYMA_06G225100 | GLYMA_18G205400 | GLYMA_04G156500 | GLYMA_09G047300 | GLYMA_09G151800 | GLYMA_02G106500 | ... | 0.007149 | 0.417857 | -0.003571 | 0.396429 | 0.260714 | 0.167857 | -0.175000 | 0.396429 | 5.0 | 0.005 |
| 16 | GLYMA_07G180000 | GLYMA_20G009200 | 0.750000 | GLYMA_02G018200 | GLYMA_13G355300 | GLYMA_10G055400 | GLYMA_08G216200 | GLYMA_02G288300 | GLYMA_12G147600 | GLYMA_05G202500 | ... | 0.260714 | -0.514286 | 0.203571 | 0.525000 | 0.328571 | 0.057143 | 0.246429 | 0.632143 | 23.0 | 0.023 |
| 25 | GLYMA_14G081100 | GLYMA_17G244300 | 0.642857 | GLYMA_07G196000 | GLYMA_03G128600 | GLYMA_06G021600 | GLYMA_03G236900 | GLYMA_04G202900 | GLYMA_07G168700 | GLYMA_03G180500 | ... | -0.221429 | -0.214286 | -0.496429 | 0.364286 | 0.346429 | -0.157143 | 0.450000 | -0.407143 | 19.0 | 0.019 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2534 | GLYMA_01G064400 | GLYMA_02G122200 | 0.796429 | GLYMA_13G213500 | GLYMA_06G175500 | GLYMA_20G130000 | GLYMA_04G046900 | GLYMA_03G239200 | GLYMA_11G180900 | GLYMA_01G094300 | ... | 0.169589 | 0.022917 | 0.495841 | -0.410343 | -0.123321 | -0.178602 | -0.537933 | -0.412555 | 1.0 | 0.001 |
| 2541 | GLYMA_04G252900 | GLYMA_06G109600 | 0.867857 | GLYMA_02G150100 | GLYMA_07G145700 | GLYMA_18G045400 | GLYMA_01G203700 | GLYMA_17G261300 | GLYMA_15G232400 | GLYMA_07G262100 | ... | 0.425000 | 0.414286 | 0.525000 | 0.659518 | 0.682143 | -0.042857 | -0.527949 | 0.659518 | 2.0 | 0.002 |
| 2552 | GLYMA_09G264700 | GLYMA_18G227200 | 0.714286 | GLYMA_04G222800 | GLYMA_08G155200 | GLYMA_18G230800 | GLYMA_07G011900 | GLYMA_01G207400 | GLYMA_10G143600 | GLYMA_09G015800 | ... | -0.307143 | -0.135714 | -0.246429 | -0.185714 | -0.089366 | -0.578571 | 0.053571 | 0.150000 | 3.0 | 0.003 |
| 2556 | GLYMA_13G345800 | GLYMA_15G028500 | 0.771429 | GLYMA_19G114000 | GLYMA_01G079200 | GLYMA_06G082100 | GLYMA_15G090300 | GLYMA_04G052100 | GLYMA_02G090800 | GLYMA_13G326200 | ... | -0.017857 | 0.564286 | 0.142857 | -0.092857 | 0.517857 | -0.296429 | 0.750000 | 0.575000 | 17.0 | 0.017 |
| 2557 | GLYMA_02G244300 | GLYMA_14G214800 | 0.753571 | GLYMA_03G025500 | GLYMA_06G190900 | GLYMA_05G098800 | GLYMA_06G098800 | GLYMA_09G205700 | GLYMA_09G008600 | GLYMA_04G232900 | ... | 0.682143 | -0.310714 | 0.896429 | -0.517857 | 0.146429 | 0.564286 | -0.671429 | -0.660714 | 32.0 | 0.032 |
629 rows × 2006 columns
(
df_paralogBootstrapResult.query("p < 0.05").pipe(so.Plot, x='SpearmanR')
.add(so.Bars(color='#116FAF'), so.Hist(bins=50))
.theme({**dt_snsStyle, 'axes.labelsize': 16, 'xtick.labelsize': 12, 'ytick.labelsize': 12})
.label(x = "Spearman's rank correlation coefficient", y = "Count")
.layout(size=(5,5))
)
ad.uns['gene_dup_corr'] = df_paralogBootstrapResult
_lsDf = []
for cluster in ad_merged.obs['Cluster'].unique():
_df = df_paralog1v1Only[['G1', 'G2', 'Gene Shuffle']]
_df['G1_expression'] = ad_merged.to_df('normalize_log').loc[cluster, _df['G1']].values
_df['G2_expression'] = ad_merged.to_df('normalize_log').loc[cluster, _df['G2']].values
_df['Shuffle_expression'] = ad_merged.to_df('normalize_log').loc[cluster, _df['Gene Shuffle']].values
_df['Cluster'] = cluster
_lsDf.append(_df)
df_1v1OnlyDiff = pd.concat(_lsDf)
df_1v1OnlyDiff = (
df_1v1OnlyDiff
.eval("Diff = G1_expression - G2_expression \n Shuffle_Diff = G1_expression - Shuffle_expression", engine='python')
.eval("Diff = Diff.abs() \n Shuffle_Diff = Shuffle_Diff.abs()", engine='python')
.assign(Diff = lambda df:np.exp(df['Diff']), Shuffle_Diff = lambda df:np.exp(df['Shuffle_Diff']))
.rename(columns={'Diff':'Diff Fc', 'Shuffle_Diff':'Shuffle Fc'})
.assign(Diff = lambda df:np.abs(np.exp(df['G1_expression']) - np.exp(df['G2_expression'])), Shuffle_Diff = lambda df:np.abs(np.exp(df['G1_expression']) - np.exp(df['Shuffle_expression'])))
)
from matplotlib.patches import PathPatch
def adjust_box_widths(g, fac):
"""
Adjust the withs of a seaborn-generated boxplot.
"""
# iterating through Axes instances
for ax in g.axes:
# iterating through axes artists:
for c in ax.get_children():
# searching for PathPatches
if isinstance(c, PathPatch):
# getting current width of box:
p = c.get_path()
verts = p.vertices
verts_sub = verts[:-1]
xmin = np.min(verts_sub[:, 0])
xmax = np.max(verts_sub[:, 0])
xmid = 0.5*(xmin+xmax)
xhalf = 0.5*(xmax - xmin)
# setting new width of box
xmin_new = xmid-fac*xhalf
xmax_new = xmid+fac*xhalf
verts_sub[verts_sub[:, 0] == xmin, 0] = xmin_new
verts_sub[verts_sub[:, 0] == xmax, 0] = xmax_new
# setting new width of median line
for l in ax.lines:
if np.all(l.get_xdata() == [xmin, xmax]):
l.set_xdata([xmin_new, xmax_new])
from itertools import cycle
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
fig, ax = plt.subplots(figsize=(6,3))
df_1v1OnlyDiff.melt("Cluster", ['Diff', 'Shuffle_Diff'], 'Group', 'Diff').pipe(sns.boxplot, x='Cluster', y='Diff', hue='Group', fliersize=0, ax=ax, )
plt.ylim(-1, 71)
for i, (box, color) in enumerate(zip([x for x in ax.get_children() if isinstance(x, mpl.patches.PathPatch)], cycle(sns.color_palette()[:2]))):
box.set_edgecolor(color)
box.set_facecolor("white")
# iterate over whiskers and median lines
for j in range(6 * i, 6 * (i + 1)):
ax.lines[j].set_color(color)
legend_elements = [Patch(facecolor='white', edgecolor=sns.color_palette()[0],
label='Duplicated Pairs'), Patch(facecolor='white', edgecolor=sns.color_palette()[1],
label='Shuffled')]
plt.legend(handles=legend_elements, loc='lower left', bbox_to_anchor=(1, 0.5), frameon=False, fontsize=12)
plt.ylabel('Absolute difference of expression\n(CPM$_{high}$ - CPM$_{low}$)', fontsize=12)
plt.xlabel("Cluster", fontsize=12)
adjust_box_widths(fig, 0.75)
sns.despine()
ad.uns['gene_dup_diff'] = df_1v1OnlyDiff
del(ad.var['bins_ForPickMock'])
del(ad.obsm['seurat_integrated_data'])
toPkl(ad, 'ad_gene_dup', 'ipf', dir_path=dir_result)
2022-11-01 20:15:36.728 | INFO | jpy_tools.otherTools:toPkl:477 - please run `loadPkl('ad_gene_dup', lambda **dt:sc.read_h5ad(**dt), arg_path='filename')` to get object
axs = sc.pl.umap(ad, color=['GLYMA_08G071200', 'GLYMA_13G043800'], layer='normalize_log', cmap='Reds', show=False, size=10)
for ax in axs:
plt.sca(ax)
plt.title(ax.get_title(), fontstyle='italic')
df_1v1OnlyDiff.groupby('Cluster').apply(lambda df:scipy.stats.ttest_ind(df['Diff'], df['Shuffle_Diff']).pvalue)
Cluster 0 1.463250e-04 1 1.085058e-07 10 2.282406e-16 11 2.502472e-03 12 3.519299e-05 13 3.588771e-13 14 1.731463e-13 2 3.281360e-10 3 3.579497e-32 4 2.973648e-13 5 1.416890e-10 6 3.381553e-09 7 2.221386e-07 8 4.559358e-16 9 2.118819e-14 dtype: float64